{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use Augmented Random Search to Play BipedalWalker-v3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import sys\n",
    "import logging\n",
    "import itertools\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import gym\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:08:42 [INFO] env: <BipedalWalker<BipedalWalker-v3>>\n",
      "00:08:43 [INFO] action_space: Box(-1.0, 1.0, (4,), float32)\n",
      "00:08:43 [INFO] observation_space: Box(-inf, inf, (24,), float32)\n",
      "00:08:43 [INFO] reward_range: (-inf, inf)\n",
      "00:08:43 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}\n",
      "00:08:43 [INFO] _max_episode_steps: 1600\n",
      "00:08:43 [INFO] _elapsed_steps: None\n",
      "00:08:43 [INFO] id: BipedalWalker-v3\n",
      "00:08:43 [INFO] entry_point: gym.envs.box2d:BipedalWalker\n",
      "00:08:43 [INFO] reward_threshold: 300\n",
      "00:08:43 [INFO] nondeterministic: False\n",
      "00:08:43 [INFO] max_episode_steps: 1600\n",
      "00:08:43 [INFO] _kwargs: {}\n",
      "00:08:43 [INFO] _env_name: BipedalWalker\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('BipedalWalker-v3')\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])\n",
    "\n",
    "def clip_reward(reward):\n",
    "    return np.clip(reward, -1., 1.)\n",
    "reward_clipped_env = gym.wrappers.TransformReward(env, clip_reward)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ESAgent:\n",
    "    def __init__(self, env=None, weights=None, bias=None):\n",
    "        if weights is not None:\n",
    "            self.weights = weights\n",
    "        else:\n",
    "            self.weights = np.zeros((env.observation_space.shape[0],\n",
    "                    env.action_space.shape[0]))\n",
    "        if bias is not None:\n",
    "            self.bias = bias\n",
    "        else:\n",
    "            self.bias = np.zeros(env.action_space.shape[0])\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        pass\n",
    "\n",
    "    def close(self):\n",
    "        pass\n",
    "\n",
    "    def step(self, observation, _reward, _done):\n",
    "        action = np.matmul(observation, self.weights)\n",
    "        return action\n",
    "\n",
    "    def train(self, env, scale=0.06, learning_rate=0.09, population=16):\n",
    "        weight_updates = np.zeros_like(self.weights)\n",
    "        bias_updates = np.zeros_like(self.bias)\n",
    "        for _ in range(population):\n",
    "            weight_delta = scale * np.random.randn(*agent.weights.shape)\n",
    "            bias_delta = scale * np.random.randn(*agent.bias.shape)\n",
    "            pos_agent = ESAgent(weights=self.weights + weight_delta,\n",
    "                    bias=self.bias + bias_delta)\n",
    "            pos_reward, _ = play_episode(env, pos_agent)\n",
    "            neg_agent = ESAgent(weights=self.weights - weight_delta,\n",
    "                    bias=self.bias - bias_delta)\n",
    "            neg_reward, _ = play_episode(env, neg_agent)\n",
    "            weight_updates += (pos_reward - neg_reward) * weight_delta\n",
    "            bias_updates += (pos_reward - neg_reward) * bias_delta\n",
    "        self.weights += learning_rate * weight_updates / population\n",
    "        self.bias += learning_rate * bias_updates / population\n",
    "\n",
    "\n",
    "agent = ESAgent(env=env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:08:43 [INFO] ==== train & evaluate ====\n",
      "00:08:55 [DEBUG] evaluate generation 0: reward = -92.21, steps = 112\n",
      "00:09:05 [DEBUG] evaluate generation 1: reward = -92.84, steps = 98\n",
      "00:09:12 [DEBUG] evaluate generation 2: reward = -95.17, steps = 67\n",
      "00:09:15 [DEBUG] evaluate generation 3: reward = -95.99, steps = 79\n",
      "00:09:25 [DEBUG] evaluate generation 4: reward = -93.12, steps = 85\n",
      "00:09:30 [DEBUG] evaluate generation 5: reward = -93.49, steps = 86\n",
      "00:09:40 [DEBUG] evaluate generation 6: reward = -95.27, steps = 75\n",
      "00:09:48 [DEBUG] evaluate generation 7: reward = -97.63, steps = 56\n",
      "00:09:54 [DEBUG] evaluate generation 8: reward = -98.29, steps = 57\n",
      "00:09:57 [DEBUG] evaluate generation 9: reward = -95.33, steps = 66\n",
      "00:10:01 [DEBUG] evaluate generation 10: reward = -98.50, steps = 61\n",
      "00:10:02 [DEBUG] evaluate generation 11: reward = -97.95, steps = 65\n",
      "00:10:06 [DEBUG] evaluate generation 12: reward = -96.33, steps = 71\n",
      "00:10:08 [DEBUG] evaluate generation 13: reward = -101.46, steps = 56\n",
      "00:10:14 [DEBUG] evaluate generation 14: reward = -95.11, steps = 77\n",
      "00:10:15 [DEBUG] evaluate generation 15: reward = -95.66, steps = 66\n",
      "00:10:17 [DEBUG] evaluate generation 16: reward = -94.83, steps = 70\n",
      "00:10:19 [DEBUG] evaluate generation 17: reward = -94.91, steps = 84\n",
      "00:10:21 [DEBUG] evaluate generation 18: reward = -94.79, steps = 72\n",
      "00:10:22 [DEBUG] evaluate generation 19: reward = -95.24, steps = 69\n",
      "00:10:25 [DEBUG] evaluate generation 20: reward = -96.11, steps = 63\n",
      "00:10:26 [DEBUG] evaluate generation 21: reward = -95.91, steps = 65\n",
      "00:10:29 [DEBUG] evaluate generation 22: reward = -97.03, steps = 68\n",
      "00:10:30 [DEBUG] evaluate generation 23: reward = -97.11, steps = 60\n",
      "00:10:31 [DEBUG] evaluate generation 24: reward = -96.64, steps = 61\n",
      "00:10:34 [DEBUG] evaluate generation 25: reward = -97.19, steps = 57\n",
      "00:10:35 [DEBUG] evaluate generation 26: reward = -96.58, steps = 60\n",
      "00:10:37 [DEBUG] evaluate generation 27: reward = -96.82, steps = 60\n",
      "00:10:38 [DEBUG] evaluate generation 28: reward = -95.98, steps = 74\n",
      "00:10:40 [DEBUG] evaluate generation 29: reward = -96.40, steps = 61\n",
      "00:10:43 [DEBUG] evaluate generation 30: reward = -96.83, steps = 58\n",
      "00:10:44 [DEBUG] evaluate generation 31: reward = -96.67, steps = 60\n",
      "00:10:46 [DEBUG] evaluate generation 32: reward = -96.42, steps = 60\n",
      "00:10:47 [DEBUG] evaluate generation 33: reward = -96.41, steps = 60\n",
      "00:10:48 [DEBUG] evaluate generation 34: reward = -96.37, steps = 61\n",
      "00:10:49 [DEBUG] evaluate generation 35: reward = -96.56, steps = 59\n",
      "00:10:51 [DEBUG] evaluate generation 36: reward = -96.05, steps = 62\n",
      "00:10:52 [DEBUG] evaluate generation 37: reward = -96.30, steps = 61\n",
      "00:10:53 [DEBUG] evaluate generation 38: reward = -96.37, steps = 61\n",
      "00:10:55 [DEBUG] evaluate generation 39: reward = -96.63, steps = 60\n",
      "00:10:57 [DEBUG] evaluate generation 40: reward = -96.46, steps = 60\n",
      "00:10:58 [DEBUG] evaluate generation 41: reward = -96.32, steps = 60\n",
      "00:11:00 [DEBUG] evaluate generation 42: reward = -96.32, steps = 60\n",
      "00:11:01 [DEBUG] evaluate generation 43: reward = -96.67, steps = 59\n",
      "00:11:02 [DEBUG] evaluate generation 44: reward = -96.37, steps = 60\n",
      "00:11:03 [DEBUG] evaluate generation 45: reward = -96.56, steps = 59\n",
      "00:11:05 [DEBUG] evaluate generation 46: reward = -96.15, steps = 61\n",
      "00:11:06 [DEBUG] evaluate generation 47: reward = -96.01, steps = 62\n",
      "00:11:09 [DEBUG] evaluate generation 48: reward = -97.49, steps = 56\n",
      "00:11:11 [DEBUG] evaluate generation 49: reward = -97.13, steps = 58\n",
      "00:11:12 [DEBUG] evaluate generation 50: reward = -96.51, steps = 60\n",
      "00:11:14 [DEBUG] evaluate generation 51: reward = -96.34, steps = 61\n",
      "00:11:16 [DEBUG] evaluate generation 52: reward = -96.31, steps = 61\n",
      "00:11:17 [DEBUG] evaluate generation 53: reward = -96.68, steps = 60\n",
      "00:11:19 [DEBUG] evaluate generation 54: reward = -96.22, steps = 61\n",
      "00:11:20 [DEBUG] evaluate generation 55: reward = -95.80, steps = 62\n",
      "00:11:22 [DEBUG] evaluate generation 56: reward = -95.26, steps = 65\n",
      "00:11:24 [DEBUG] evaluate generation 57: reward = -95.04, steps = 68\n",
      "00:11:25 [DEBUG] evaluate generation 58: reward = -94.71, steps = 76\n",
      "00:11:27 [DEBUG] evaluate generation 59: reward = -94.31, steps = 80\n",
      "00:11:28 [DEBUG] evaluate generation 60: reward = -94.27, steps = 92\n",
      "00:11:32 [DEBUG] evaluate generation 61: reward = -96.33, steps = 61\n",
      "00:11:34 [DEBUG] evaluate generation 62: reward = -96.10, steps = 62\n",
      "00:11:35 [DEBUG] evaluate generation 63: reward = -97.89, steps = 58\n",
      "00:11:38 [DEBUG] evaluate generation 64: reward = -95.06, steps = 65\n",
      "00:11:40 [DEBUG] evaluate generation 65: reward = -95.90, steps = 63\n",
      "00:11:42 [DEBUG] evaluate generation 66: reward = -95.79, steps = 63\n",
      "00:11:45 [DEBUG] evaluate generation 67: reward = -97.26, steps = 60\n",
      "00:11:47 [DEBUG] evaluate generation 68: reward = -96.28, steps = 61\n",
      "00:11:48 [DEBUG] evaluate generation 69: reward = -96.86, steps = 60\n",
      "00:11:50 [DEBUG] evaluate generation 70: reward = -97.71, steps = 59\n",
      "00:11:52 [DEBUG] evaluate generation 71: reward = -95.50, steps = 63\n",
      "00:11:53 [DEBUG] evaluate generation 72: reward = -94.78, steps = 72\n",
      "00:11:55 [DEBUG] evaluate generation 73: reward = -94.93, steps = 72\n",
      "00:11:57 [DEBUG] evaluate generation 74: reward = -94.64, steps = 72\n",
      "00:12:01 [DEBUG] evaluate generation 75: reward = -96.15, steps = 61\n",
      "00:12:03 [DEBUG] evaluate generation 76: reward = -96.67, steps = 59\n",
      "00:12:04 [DEBUG] evaluate generation 77: reward = -96.71, steps = 59\n",
      "00:12:06 [DEBUG] evaluate generation 78: reward = -96.60, steps = 61\n",
      "00:12:07 [DEBUG] evaluate generation 79: reward = -95.16, steps = 66\n",
      "00:12:11 [DEBUG] evaluate generation 80: reward = -96.25, steps = 61\n",
      "00:12:12 [DEBUG] evaluate generation 81: reward = -97.80, steps = 58\n",
      "00:12:15 [DEBUG] evaluate generation 82: reward = -96.33, steps = 61\n",
      "00:12:16 [DEBUG] evaluate generation 83: reward = -98.66, steps = 55\n",
      "00:12:18 [DEBUG] evaluate generation 84: reward = -98.18, steps = 55\n",
      "00:12:19 [DEBUG] evaluate generation 85: reward = -97.38, steps = 59\n",
      "00:12:21 [DEBUG] evaluate generation 86: reward = -95.88, steps = 61\n",
      "00:12:23 [DEBUG] evaluate generation 87: reward = -98.22, steps = 56\n",
      "00:12:25 [DEBUG] evaluate generation 88: reward = -96.64, steps = 59\n",
      "00:12:28 [DEBUG] evaluate generation 89: reward = -96.33, steps = 60\n",
      "00:12:30 [DEBUG] evaluate generation 90: reward = -98.53, steps = 54\n",
      "00:12:33 [DEBUG] evaluate generation 91: reward = -96.55, steps = 59\n",
      "00:12:34 [DEBUG] evaluate generation 92: reward = -96.47, steps = 60\n",
      "00:12:35 [DEBUG] evaluate generation 93: reward = -96.34, steps = 60\n",
      "00:12:37 [DEBUG] evaluate generation 94: reward = -98.64, steps = 55\n",
      "00:12:38 [DEBUG] evaluate generation 95: reward = -97.60, steps = 57\n",
      "00:12:40 [DEBUG] evaluate generation 96: reward = -96.51, steps = 59\n",
      "00:12:42 [DEBUG] evaluate generation 97: reward = -97.10, steps = 58\n",
      "00:12:44 [DEBUG] evaluate generation 98: reward = -97.64, steps = 57\n",
      "00:12:45 [DEBUG] evaluate generation 99: reward = -96.19, steps = 61\n",
      "00:12:46 [DEBUG] evaluate generation 100: reward = -95.32, steps = 61\n",
      "00:12:48 [DEBUG] evaluate generation 101: reward = -95.00, steps = 63\n",
      "00:12:50 [DEBUG] evaluate generation 102: reward = -94.94, steps = 63\n",
      "00:12:52 [DEBUG] evaluate generation 103: reward = -95.01, steps = 64\n",
      "00:12:55 [DEBUG] evaluate generation 104: reward = -96.01, steps = 60\n",
      "00:12:57 [DEBUG] evaluate generation 105: reward = -96.23, steps = 59\n",
      "00:12:58 [DEBUG] evaluate generation 106: reward = -95.57, steps = 61\n",
      "00:13:00 [DEBUG] evaluate generation 107: reward = -95.34, steps = 63\n",
      "00:13:01 [DEBUG] evaluate generation 108: reward = -95.27, steps = 64\n",
      "00:13:03 [DEBUG] evaluate generation 109: reward = -95.10, steps = 66\n",
      "00:13:04 [DEBUG] evaluate generation 110: reward = -97.08, steps = 59\n",
      "00:13:06 [DEBUG] evaluate generation 111: reward = -95.23, steps = 67\n",
      "00:13:08 [DEBUG] evaluate generation 112: reward = -94.88, steps = 67\n",
      "00:13:09 [DEBUG] evaluate generation 113: reward = -94.77, steps = 72\n",
      "00:13:11 [DEBUG] evaluate generation 114: reward = -94.83, steps = 73\n",
      "00:13:13 [DEBUG] evaluate generation 115: reward = -95.51, steps = 61\n",
      "00:13:15 [DEBUG] evaluate generation 116: reward = -95.40, steps = 63\n",
      "00:13:16 [DEBUG] evaluate generation 117: reward = -95.14, steps = 69\n",
      "00:13:18 [DEBUG] evaluate generation 118: reward = -95.76, steps = 63\n",
      "00:13:20 [DEBUG] evaluate generation 119: reward = -95.02, steps = 66\n",
      "00:13:21 [DEBUG] evaluate generation 120: reward = -94.87, steps = 70\n",
      "00:13:23 [DEBUG] evaluate generation 121: reward = -94.73, steps = 72\n",
      "00:13:24 [DEBUG] evaluate generation 122: reward = -94.16, steps = 80\n",
      "00:13:27 [DEBUG] evaluate generation 123: reward = -93.68, steps = 79\n",
      "00:13:29 [DEBUG] evaluate generation 124: reward = -94.27, steps = 68\n",
      "00:13:31 [DEBUG] evaluate generation 125: reward = -94.68, steps = 70\n",
      "00:13:32 [DEBUG] evaluate generation 126: reward = -93.89, steps = 77\n",
      "00:13:34 [DEBUG] evaluate generation 127: reward = -93.96, steps = 78\n",
      "00:13:37 [DEBUG] evaluate generation 128: reward = -95.00, steps = 69\n",
      "00:13:38 [DEBUG] evaluate generation 129: reward = -94.10, steps = 74\n",
      "00:13:40 [DEBUG] evaluate generation 130: reward = -94.21, steps = 77\n",
      "00:13:43 [DEBUG] evaluate generation 131: reward = -93.97, steps = 70\n",
      "00:13:45 [DEBUG] evaluate generation 132: reward = -95.44, steps = 62\n",
      "00:13:46 [DEBUG] evaluate generation 133: reward = -94.20, steps = 65\n",
      "00:13:48 [DEBUG] evaluate generation 134: reward = -94.35, steps = 74\n",
      "00:13:50 [DEBUG] evaluate generation 135: reward = -93.54, steps = 78\n",
      "00:13:53 [DEBUG] evaluate generation 136: reward = -93.88, steps = 75\n",
      "00:13:55 [DEBUG] evaluate generation 137: reward = -94.70, steps = 67\n",
      "00:13:56 [DEBUG] evaluate generation 138: reward = -94.56, steps = 69\n",
      "00:13:59 [DEBUG] evaluate generation 139: reward = -95.06, steps = 63\n",
      "00:14:00 [DEBUG] evaluate generation 140: reward = -95.21, steps = 64\n",
      "00:14:02 [DEBUG] evaluate generation 141: reward = -94.77, steps = 67\n",
      "00:14:04 [DEBUG] evaluate generation 142: reward = -95.40, steps = 62\n",
      "00:14:06 [DEBUG] evaluate generation 143: reward = -95.40, steps = 63\n",
      "00:14:08 [DEBUG] evaluate generation 144: reward = -94.87, steps = 66\n",
      "00:14:09 [DEBUG] evaluate generation 145: reward = -94.60, steps = 68\n",
      "00:14:11 [DEBUG] evaluate generation 146: reward = -95.12, steps = 67\n",
      "00:14:13 [DEBUG] evaluate generation 147: reward = -94.26, steps = 73\n",
      "00:14:14 [DEBUG] evaluate generation 148: reward = -94.33, steps = 74\n",
      "00:14:16 [DEBUG] evaluate generation 149: reward = -94.36, steps = 76\n",
      "00:14:17 [DEBUG] evaluate generation 150: reward = -94.21, steps = 78\n",
      "00:14:19 [DEBUG] evaluate generation 151: reward = -94.52, steps = 75\n",
      "00:14:20 [DEBUG] evaluate generation 152: reward = -94.29, steps = 75\n",
      "00:14:22 [DEBUG] evaluate generation 153: reward = -94.08, steps = 73\n",
      "00:14:25 [DEBUG] evaluate generation 154: reward = -94.59, steps = 77\n",
      "00:14:26 [DEBUG] evaluate generation 155: reward = -94.65, steps = 79\n",
      "00:14:28 [DEBUG] evaluate generation 156: reward = -94.53, steps = 80\n",
      "00:14:30 [DEBUG] evaluate generation 157: reward = -94.24, steps = 74\n",
      "00:14:31 [DEBUG] evaluate generation 158: reward = -94.04, steps = 75\n",
      "00:14:33 [DEBUG] evaluate generation 159: reward = -94.46, steps = 72\n",
      "00:14:35 [DEBUG] evaluate generation 160: reward = -94.38, steps = 72\n",
      "00:14:37 [DEBUG] evaluate generation 161: reward = -94.26, steps = 73\n",
      "00:14:38 [DEBUG] evaluate generation 162: reward = -94.59, steps = 72\n",
      "00:14:40 [DEBUG] evaluate generation 163: reward = -94.16, steps = 73\n",
      "00:14:42 [DEBUG] evaluate generation 164: reward = -94.59, steps = 73\n",
      "00:14:43 [DEBUG] evaluate generation 165: reward = -94.27, steps = 73\n",
      "00:14:46 [DEBUG] evaluate generation 166: reward = -93.82, steps = 77\n",
      "00:14:47 [DEBUG] evaluate generation 167: reward = -93.85, steps = 76\n",
      "00:14:51 [DEBUG] evaluate generation 168: reward = -95.21, steps = 78\n",
      "00:14:52 [DEBUG] evaluate generation 169: reward = -95.14, steps = 84\n",
      "00:14:54 [DEBUG] evaluate generation 170: reward = -95.23, steps = 70\n",
      "00:14:56 [DEBUG] evaluate generation 171: reward = -94.82, steps = 81\n",
      "00:14:57 [DEBUG] evaluate generation 172: reward = -94.26, steps = 84\n",
      "00:14:59 [DEBUG] evaluate generation 173: reward = -93.89, steps = 74\n",
      "00:15:02 [DEBUG] evaluate generation 174: reward = -95.51, steps = 64\n",
      "00:15:04 [DEBUG] evaluate generation 175: reward = -95.57, steps = 68\n",
      "00:15:05 [DEBUG] evaluate generation 176: reward = -95.06, steps = 74\n",
      "00:15:07 [DEBUG] evaluate generation 177: reward = -94.93, steps = 80\n",
      "00:15:08 [DEBUG] evaluate generation 178: reward = -94.71, steps = 81\n",
      "00:15:10 [DEBUG] evaluate generation 179: reward = -94.19, steps = 78\n",
      "00:15:13 [DEBUG] evaluate generation 180: reward = -95.45, steps = 66\n",
      "00:15:15 [DEBUG] evaluate generation 181: reward = -95.45, steps = 62\n",
      "00:15:17 [DEBUG] evaluate generation 182: reward = -94.82, steps = 64\n",
      "00:15:18 [DEBUG] evaluate generation 183: reward = -94.57, steps = 66\n",
      "00:15:20 [DEBUG] evaluate generation 184: reward = -96.60, steps = 60\n",
      "00:15:21 [DEBUG] evaluate generation 185: reward = -97.05, steps = 59\n",
      "00:15:22 [DEBUG] evaluate generation 186: reward = -95.44, steps = 62\n",
      "00:15:24 [DEBUG] evaluate generation 187: reward = -96.32, steps = 61\n",
      "00:15:25 [DEBUG] evaluate generation 188: reward = -96.45, steps = 61\n",
      "00:15:27 [DEBUG] evaluate generation 189: reward = -95.30, steps = 62\n",
      "00:15:29 [DEBUG] evaluate generation 190: reward = -95.31, steps = 62\n",
      "00:15:30 [DEBUG] evaluate generation 191: reward = -95.37, steps = 62\n",
      "00:15:31 [DEBUG] evaluate generation 192: reward = -95.92, steps = 61\n",
      "00:15:33 [DEBUG] evaluate generation 193: reward = -95.77, steps = 62\n",
      "00:15:34 [DEBUG] evaluate generation 194: reward = -95.47, steps = 62\n",
      "00:15:36 [DEBUG] evaluate generation 195: reward = -94.82, steps = 64\n",
      "00:15:37 [DEBUG] evaluate generation 196: reward = -94.44, steps = 68\n",
      "00:15:39 [DEBUG] evaluate generation 197: reward = -94.58, steps = 68\n",
      "00:15:41 [DEBUG] evaluate generation 198: reward = -94.77, steps = 66\n",
      "00:15:42 [DEBUG] evaluate generation 199: reward = -95.37, steps = 63\n",
      "00:15:44 [DEBUG] evaluate generation 200: reward = -95.44, steps = 63\n",
      "00:15:45 [DEBUG] evaluate generation 201: reward = -95.32, steps = 62\n",
      "00:15:47 [DEBUG] evaluate generation 202: reward = -94.59, steps = 66\n",
      "00:15:48 [DEBUG] evaluate generation 203: reward = -94.38, steps = 68\n",
      "00:15:51 [DEBUG] evaluate generation 204: reward = -95.68, steps = 63\n",
      "00:15:52 [DEBUG] evaluate generation 205: reward = -95.43, steps = 62\n",
      "00:15:53 [DEBUG] evaluate generation 206: reward = -95.45, steps = 62\n",
      "00:15:55 [DEBUG] evaluate generation 207: reward = -94.71, steps = 65\n",
      "00:15:56 [DEBUG] evaluate generation 208: reward = -94.29, steps = 67\n",
      "00:15:58 [DEBUG] evaluate generation 209: reward = -94.44, steps = 68\n",
      "00:16:00 [DEBUG] evaluate generation 210: reward = -94.43, steps = 69\n",
      "00:16:02 [DEBUG] evaluate generation 211: reward = -94.47, steps = 69\n",
      "00:16:03 [DEBUG] evaluate generation 212: reward = -94.31, steps = 70\n",
      "00:16:05 [DEBUG] evaluate generation 213: reward = -94.41, steps = 70\n",
      "00:16:06 [DEBUG] evaluate generation 214: reward = -94.06, steps = 72\n",
      "00:16:08 [DEBUG] evaluate generation 215: reward = -93.91, steps = 77\n",
      "00:16:09 [DEBUG] evaluate generation 216: reward = -93.98, steps = 77\n",
      "00:16:12 [DEBUG] evaluate generation 217: reward = -94.67, steps = 71\n",
      "00:16:13 [DEBUG] evaluate generation 218: reward = -94.47, steps = 73\n",
      "00:16:15 [DEBUG] evaluate generation 219: reward = -94.40, steps = 73\n",
      "00:16:18 [DEBUG] evaluate generation 220: reward = -95.47, steps = 70\n",
      "00:16:19 [DEBUG] evaluate generation 221: reward = -94.99, steps = 70\n",
      "00:16:21 [DEBUG] evaluate generation 222: reward = -95.05, steps = 73\n",
      "00:16:23 [DEBUG] evaluate generation 223: reward = -94.68, steps = 77\n",
      "00:16:24 [DEBUG] evaluate generation 224: reward = -95.11, steps = 79\n",
      "00:16:26 [DEBUG] evaluate generation 225: reward = -95.04, steps = 77\n",
      "00:16:28 [DEBUG] evaluate generation 226: reward = -94.86, steps = 71\n",
      "00:16:29 [DEBUG] evaluate generation 227: reward = -95.01, steps = 73\n",
      "00:16:31 [DEBUG] evaluate generation 228: reward = -94.71, steps = 73\n",
      "00:16:32 [DEBUG] evaluate generation 229: reward = -94.58, steps = 77\n",
      "00:16:35 [DEBUG] evaluate generation 230: reward = -94.27, steps = 68\n",
      "00:16:37 [DEBUG] evaluate generation 231: reward = -94.34, steps = 70\n",
      "00:16:38 [DEBUG] evaluate generation 232: reward = -93.96, steps = 69\n",
      "00:16:40 [DEBUG] evaluate generation 233: reward = -94.00, steps = 70\n",
      "00:16:41 [DEBUG] evaluate generation 234: reward = -94.14, steps = 69\n",
      "00:16:43 [DEBUG] evaluate generation 235: reward = -94.03, steps = 71\n",
      "00:16:44 [DEBUG] evaluate generation 236: reward = -94.03, steps = 71\n",
      "00:16:47 [DEBUG] evaluate generation 237: reward = -95.40, steps = 62\n",
      "00:16:48 [DEBUG] evaluate generation 238: reward = -94.44, steps = 65\n",
      "00:16:51 [DEBUG] evaluate generation 239: reward = -95.37, steps = 63\n",
      "00:16:52 [DEBUG] evaluate generation 240: reward = -95.19, steps = 63\n",
      "00:16:54 [DEBUG] evaluate generation 241: reward = -95.13, steps = 63\n",
      "00:16:55 [DEBUG] evaluate generation 242: reward = -94.77, steps = 64\n",
      "00:16:56 [DEBUG] evaluate generation 243: reward = -93.87, steps = 67\n",
      "00:16:58 [DEBUG] evaluate generation 244: reward = -93.33, steps = 69\n",
      "00:17:00 [DEBUG] evaluate generation 245: reward = -94.96, steps = 63\n",
      "00:17:02 [DEBUG] evaluate generation 246: reward = -94.44, steps = 65\n",
      "00:17:03 [DEBUG] evaluate generation 247: reward = -93.92, steps = 67\n",
      "00:17:05 [DEBUG] evaluate generation 248: reward = -93.42, steps = 70\n",
      "00:17:06 [DEBUG] evaluate generation 249: reward = -93.56, steps = 69\n",
      "00:17:08 [DEBUG] evaluate generation 250: reward = -94.13, steps = 66\n",
      "00:17:09 [DEBUG] evaluate generation 251: reward = -93.54, steps = 70\n",
      "00:17:11 [DEBUG] evaluate generation 252: reward = -95.31, steps = 65\n",
      "00:17:13 [DEBUG] evaluate generation 253: reward = -94.50, steps = 66\n",
      "00:17:14 [DEBUG] evaluate generation 254: reward = -94.37, steps = 67\n",
      "00:17:16 [DEBUG] evaluate generation 255: reward = -94.08, steps = 70\n",
      "00:17:17 [DEBUG] evaluate generation 256: reward = -96.29, steps = 63\n",
      "00:17:19 [DEBUG] evaluate generation 257: reward = -94.44, steps = 65\n",
      "00:17:20 [DEBUG] evaluate generation 258: reward = -93.96, steps = 68\n",
      "00:17:22 [DEBUG] evaluate generation 259: reward = -94.13, steps = 68\n",
      "00:17:24 [DEBUG] evaluate generation 260: reward = -93.89, steps = 72\n",
      "00:17:25 [DEBUG] evaluate generation 261: reward = -94.49, steps = 66\n",
      "00:17:27 [DEBUG] evaluate generation 262: reward = -94.11, steps = 68\n",
      "00:17:28 [DEBUG] evaluate generation 263: reward = -94.25, steps = 67\n",
      "00:17:30 [DEBUG] evaluate generation 264: reward = -94.67, steps = 64\n",
      "00:17:31 [DEBUG] evaluate generation 265: reward = -95.62, steps = 64\n",
      "00:17:33 [DEBUG] evaluate generation 266: reward = -94.36, steps = 68\n",
      "00:17:35 [DEBUG] evaluate generation 267: reward = -95.35, steps = 63\n",
      "00:17:37 [DEBUG] evaluate generation 268: reward = -95.24, steps = 66\n",
      "00:17:39 [DEBUG] evaluate generation 269: reward = -95.16, steps = 66\n",
      "00:17:40 [DEBUG] evaluate generation 270: reward = -95.04, steps = 66\n",
      "00:17:42 [DEBUG] evaluate generation 271: reward = -94.83, steps = 67\n",
      "00:17:43 [DEBUG] evaluate generation 272: reward = -94.86, steps = 67\n",
      "00:17:45 [DEBUG] evaluate generation 273: reward = -96.36, steps = 63\n",
      "00:17:46 [DEBUG] evaluate generation 274: reward = -96.82, steps = 62\n",
      "00:17:48 [DEBUG] evaluate generation 275: reward = -94.53, steps = 68\n",
      "00:17:50 [DEBUG] evaluate generation 276: reward = -95.33, steps = 64\n",
      "00:17:51 [DEBUG] evaluate generation 277: reward = -94.69, steps = 66\n",
      "00:17:53 [DEBUG] evaluate generation 278: reward = -94.60, steps = 67\n",
      "00:17:54 [DEBUG] evaluate generation 279: reward = -96.39, steps = 62\n",
      "00:17:56 [DEBUG] evaluate generation 280: reward = -93.24, steps = 73\n",
      "00:17:58 [DEBUG] evaluate generation 281: reward = -94.55, steps = 66\n",
      "00:17:59 [DEBUG] evaluate generation 282: reward = -94.32, steps = 67\n",
      "00:18:01 [DEBUG] evaluate generation 283: reward = -92.84, steps = 74\n",
      "00:18:03 [DEBUG] evaluate generation 284: reward = -94.65, steps = 66\n",
      "00:18:05 [DEBUG] evaluate generation 285: reward = -94.60, steps = 66\n",
      "00:18:06 [DEBUG] evaluate generation 286: reward = -93.80, steps = 69\n",
      "00:18:08 [DEBUG] evaluate generation 287: reward = -95.20, steps = 65\n",
      "00:18:09 [DEBUG] evaluate generation 288: reward = -94.70, steps = 65\n",
      "00:18:11 [DEBUG] evaluate generation 289: reward = -92.66, steps = 75\n",
      "00:18:12 [DEBUG] evaluate generation 290: reward = -93.03, steps = 72\n",
      "00:18:14 [DEBUG] evaluate generation 291: reward = -92.56, steps = 75\n",
      "00:18:16 [DEBUG] evaluate generation 292: reward = -93.24, steps = 70\n",
      "00:18:18 [DEBUG] evaluate generation 293: reward = -92.57, steps = 77\n",
      "00:18:20 [DEBUG] evaluate generation 294: reward = -92.31, steps = 78\n",
      "00:18:22 [DEBUG] evaluate generation 295: reward = -92.46, steps = 77\n",
      "00:18:24 [DEBUG] evaluate generation 296: reward = -92.62, steps = 79\n",
      "00:18:26 [DEBUG] evaluate generation 297: reward = -92.91, steps = 75\n",
      "00:18:28 [DEBUG] evaluate generation 298: reward = -93.18, steps = 72\n",
      "00:18:30 [DEBUG] evaluate generation 299: reward = -92.94, steps = 73\n",
      "00:18:31 [DEBUG] evaluate generation 300: reward = -93.35, steps = 70\n",
      "00:18:33 [DEBUG] evaluate generation 301: reward = -93.30, steps = 70\n",
      "00:18:35 [DEBUG] evaluate generation 302: reward = -92.73, steps = 74\n",
      "00:18:38 [DEBUG] evaluate generation 303: reward = -93.95, steps = 67\n",
      "00:18:40 [DEBUG] evaluate generation 304: reward = -94.08, steps = 67\n",
      "00:18:41 [DEBUG] evaluate generation 305: reward = -93.72, steps = 69\n",
      "00:18:43 [DEBUG] evaluate generation 306: reward = -93.50, steps = 69\n",
      "00:18:47 [DEBUG] evaluate generation 307: reward = -94.92, steps = 65\n",
      "00:18:49 [DEBUG] evaluate generation 308: reward = -95.01, steps = 64\n",
      "00:18:50 [DEBUG] evaluate generation 309: reward = -94.76, steps = 66\n",
      "00:18:52 [DEBUG] evaluate generation 310: reward = -93.27, steps = 72\n",
      "00:18:53 [DEBUG] evaluate generation 311: reward = -94.00, steps = 69\n",
      "00:18:55 [DEBUG] evaluate generation 312: reward = -93.47, steps = 70\n",
      "00:18:56 [DEBUG] evaluate generation 313: reward = -93.88, steps = 69\n",
      "00:18:59 [DEBUG] evaluate generation 314: reward = -93.13, steps = 73\n",
      "00:19:01 [DEBUG] evaluate generation 315: reward = -93.25, steps = 74\n",
      "00:19:02 [DEBUG] evaluate generation 316: reward = -93.00, steps = 76\n",
      "00:19:05 [DEBUG] evaluate generation 317: reward = -93.47, steps = 73\n",
      "00:19:08 [DEBUG] evaluate generation 318: reward = -93.64, steps = 72\n",
      "00:19:10 [DEBUG] evaluate generation 319: reward = -93.65, steps = 73\n",
      "00:19:15 [DEBUG] evaluate generation 320: reward = -94.59, steps = 67\n",
      "00:19:17 [DEBUG] evaluate generation 321: reward = -94.50, steps = 68\n",
      "00:19:20 [DEBUG] evaluate generation 322: reward = -94.56, steps = 70\n",
      "00:19:22 [DEBUG] evaluate generation 323: reward = -93.08, steps = 74\n",
      "00:19:24 [DEBUG] evaluate generation 324: reward = -93.01, steps = 74\n",
      "00:19:26 [DEBUG] evaluate generation 325: reward = -93.08, steps = 74\n",
      "00:19:28 [DEBUG] evaluate generation 326: reward = -92.74, steps = 75\n",
      "00:19:30 [DEBUG] evaluate generation 327: reward = -93.33, steps = 72\n",
      "00:19:32 [DEBUG] evaluate generation 328: reward = -92.40, steps = 77\n",
      "00:19:33 [DEBUG] evaluate generation 329: reward = -92.62, steps = 75\n",
      "00:19:35 [DEBUG] evaluate generation 330: reward = -93.00, steps = 73\n",
      "00:19:37 [DEBUG] evaluate generation 331: reward = -92.50, steps = 77\n",
      "00:19:38 [DEBUG] evaluate generation 332: reward = -91.74, steps = 84\n",
      "00:19:41 [DEBUG] evaluate generation 333: reward = -93.22, steps = 74\n",
      "00:19:44 [DEBUG] evaluate generation 334: reward = -93.53, steps = 72\n",
      "00:19:46 [DEBUG] evaluate generation 335: reward = -93.73, steps = 72\n",
      "00:19:50 [DEBUG] evaluate generation 336: reward = -94.11, steps = 71\n",
      "00:19:52 [DEBUG] evaluate generation 337: reward = -92.66, steps = 76\n",
      "00:19:54 [DEBUG] evaluate generation 338: reward = -92.17, steps = 80\n",
      "00:19:56 [DEBUG] evaluate generation 339: reward = -92.76, steps = 80\n",
      "00:19:58 [DEBUG] evaluate generation 340: reward = -92.09, steps = 82\n",
      "00:20:00 [DEBUG] evaluate generation 341: reward = -92.82, steps = 77\n",
      "00:20:02 [DEBUG] evaluate generation 342: reward = -91.99, steps = 80\n",
      "00:20:04 [DEBUG] evaluate generation 343: reward = -91.48, steps = 80\n",
      "00:20:05 [DEBUG] evaluate generation 344: reward = -91.43, steps = 80\n",
      "00:20:07 [DEBUG] evaluate generation 345: reward = -92.42, steps = 82\n",
      "00:20:10 [DEBUG] evaluate generation 346: reward = -91.45, steps = 80\n",
      "00:20:11 [DEBUG] evaluate generation 347: reward = -90.79, steps = 83\n",
      "00:20:13 [DEBUG] evaluate generation 348: reward = -89.91, steps = 88\n",
      "00:20:15 [DEBUG] evaluate generation 349: reward = -89.92, steps = 90\n",
      "00:20:17 [DEBUG] evaluate generation 350: reward = -88.97, steps = 100\n",
      "00:20:20 [DEBUG] evaluate generation 351: reward = -92.78, steps = 82\n",
      "00:20:24 [DEBUG] evaluate generation 352: reward = -92.42, steps = 81\n",
      "00:20:25 [DEBUG] evaluate generation 353: reward = -92.21, steps = 84\n",
      "00:20:28 [DEBUG] evaluate generation 354: reward = -92.18, steps = 85\n",
      "00:20:30 [DEBUG] evaluate generation 355: reward = -92.84, steps = 76\n",
      "00:20:31 [DEBUG] evaluate generation 356: reward = -91.45, steps = 86\n",
      "00:20:34 [DEBUG] evaluate generation 357: reward = -93.04, steps = 76\n",
      "00:20:36 [DEBUG] evaluate generation 358: reward = -92.83, steps = 76\n",
      "00:20:37 [DEBUG] evaluate generation 359: reward = -92.42, steps = 80\n",
      "00:20:40 [DEBUG] evaluate generation 360: reward = -91.71, steps = 86\n",
      "00:20:43 [DEBUG] evaluate generation 361: reward = -93.92, steps = 71\n",
      "00:20:46 [DEBUG] evaluate generation 362: reward = -94.12, steps = 70\n",
      "00:20:48 [DEBUG] evaluate generation 363: reward = -93.92, steps = 70\n",
      "00:20:50 [DEBUG] evaluate generation 364: reward = -93.38, steps = 75\n",
      "00:20:51 [DEBUG] evaluate generation 365: reward = -93.71, steps = 73\n",
      "00:20:53 [DEBUG] evaluate generation 366: reward = -93.30, steps = 80\n",
      "00:20:57 [DEBUG] evaluate generation 367: reward = -93.21, steps = 73\n",
      "00:20:59 [DEBUG] evaluate generation 368: reward = -93.06, steps = 76\n",
      "00:21:01 [DEBUG] evaluate generation 369: reward = -93.35, steps = 74\n",
      "00:21:03 [DEBUG] evaluate generation 370: reward = -92.56, steps = 78\n",
      "00:21:05 [DEBUG] evaluate generation 371: reward = -93.04, steps = 78\n",
      "00:21:07 [DEBUG] evaluate generation 372: reward = -92.26, steps = 77\n",
      "00:21:10 [DEBUG] evaluate generation 373: reward = -94.00, steps = 70\n",
      "00:21:11 [DEBUG] evaluate generation 374: reward = -92.96, steps = 81\n",
      "00:21:13 [DEBUG] evaluate generation 375: reward = -92.62, steps = 79\n",
      "00:21:15 [DEBUG] evaluate generation 376: reward = -92.16, steps = 77\n",
      "00:21:17 [DEBUG] evaluate generation 377: reward = -92.77, steps = 76\n",
      "00:21:20 [DEBUG] evaluate generation 378: reward = -92.81, steps = 81\n",
      "00:21:21 [DEBUG] evaluate generation 379: reward = -92.38, steps = 77\n",
      "00:21:24 [DEBUG] evaluate generation 380: reward = -93.91, steps = 72\n",
      "00:21:25 [DEBUG] evaluate generation 381: reward = -93.03, steps = 78\n",
      "00:21:27 [DEBUG] evaluate generation 382: reward = -93.46, steps = 73\n",
      "00:21:29 [DEBUG] evaluate generation 383: reward = -93.32, steps = 74\n",
      "00:21:31 [DEBUG] evaluate generation 384: reward = -92.69, steps = 78\n",
      "00:21:33 [DEBUG] evaluate generation 385: reward = -92.69, steps = 78\n",
      "00:21:34 [DEBUG] evaluate generation 386: reward = -92.21, steps = 84\n",
      "00:21:36 [DEBUG] evaluate generation 387: reward = -93.12, steps = 76\n",
      "00:21:38 [DEBUG] evaluate generation 388: reward = -92.24, steps = 84\n",
      "00:21:39 [DEBUG] evaluate generation 389: reward = -93.34, steps = 76\n",
      "00:21:41 [DEBUG] evaluate generation 390: reward = -94.03, steps = 75\n",
      "00:21:43 [DEBUG] evaluate generation 391: reward = -93.13, steps = 80\n",
      "00:21:44 [DEBUG] evaluate generation 392: reward = -92.58, steps = 84\n",
      "00:21:46 [DEBUG] evaluate generation 393: reward = -93.03, steps = 78\n",
      "00:21:48 [DEBUG] evaluate generation 394: reward = -91.51, steps = 86\n",
      "00:21:50 [DEBUG] evaluate generation 395: reward = -92.65, steps = 79\n",
      "00:21:52 [DEBUG] evaluate generation 396: reward = -92.20, steps = 82\n",
      "00:21:56 [DEBUG] evaluate generation 397: reward = -93.10, steps = 77\n",
      "00:21:58 [DEBUG] evaluate generation 398: reward = -92.30, steps = 76\n",
      "00:22:00 [DEBUG] evaluate generation 399: reward = -92.45, steps = 76\n",
      "00:22:01 [DEBUG] evaluate generation 400: reward = -92.13, steps = 80\n",
      "00:22:05 [DEBUG] evaluate generation 401: reward = -93.04, steps = 74\n",
      "00:22:07 [DEBUG] evaluate generation 402: reward = -92.31, steps = 78\n",
      "00:22:08 [DEBUG] evaluate generation 403: reward = -92.26, steps = 77\n",
      "00:22:10 [DEBUG] evaluate generation 404: reward = -92.07, steps = 81\n",
      "00:22:12 [DEBUG] evaluate generation 405: reward = -92.82, steps = 77\n",
      "00:22:15 [DEBUG] evaluate generation 406: reward = -92.93, steps = 76\n",
      "00:22:17 [DEBUG] evaluate generation 407: reward = -92.50, steps = 76\n",
      "00:22:18 [DEBUG] evaluate generation 408: reward = -92.18, steps = 78\n",
      "00:22:23 [DEBUG] evaluate generation 409: reward = -93.91, steps = 73\n",
      "00:22:25 [DEBUG] evaluate generation 410: reward = -93.94, steps = 74\n",
      "00:22:26 [DEBUG] evaluate generation 411: reward = -93.49, steps = 83\n",
      "00:22:28 [DEBUG] evaluate generation 412: reward = -93.23, steps = 84\n",
      "00:22:30 [DEBUG] evaluate generation 413: reward = -93.43, steps = 83\n",
      "00:22:31 [DEBUG] evaluate generation 414: reward = -93.43, steps = 88\n",
      "00:22:34 [DEBUG] evaluate generation 415: reward = -93.08, steps = 83\n",
      "00:22:36 [DEBUG] evaluate generation 416: reward = -93.24, steps = 80\n",
      "00:22:38 [DEBUG] evaluate generation 417: reward = -92.73, steps = 82\n",
      "00:22:40 [DEBUG] evaluate generation 418: reward = -92.67, steps = 86\n",
      "00:22:41 [DEBUG] evaluate generation 419: reward = -92.67, steps = 87\n",
      "00:22:43 [DEBUG] evaluate generation 420: reward = -92.49, steps = 84\n",
      "00:22:45 [DEBUG] evaluate generation 421: reward = -92.79, steps = 79\n",
      "00:22:47 [DEBUG] evaluate generation 422: reward = -93.06, steps = 83\n",
      "00:22:49 [DEBUG] evaluate generation 423: reward = -92.83, steps = 91\n",
      "00:22:51 [DEBUG] evaluate generation 424: reward = -92.65, steps = 82\n",
      "00:22:54 [DEBUG] evaluate generation 425: reward = -92.87, steps = 82\n",
      "00:22:56 [DEBUG] evaluate generation 426: reward = -92.85, steps = 80\n",
      "00:22:58 [DEBUG] evaluate generation 427: reward = -92.65, steps = 78\n",
      "00:22:59 [DEBUG] evaluate generation 428: reward = -92.30, steps = 83\n",
      "00:23:01 [DEBUG] evaluate generation 429: reward = -91.94, steps = 85\n",
      "00:23:03 [DEBUG] evaluate generation 430: reward = -92.33, steps = 79\n",
      "00:23:06 [DEBUG] evaluate generation 431: reward = -93.55, steps = 74\n",
      "00:23:08 [DEBUG] evaluate generation 432: reward = -94.08, steps = 71\n",
      "00:23:09 [DEBUG] evaluate generation 433: reward = -92.92, steps = 75\n",
      "00:23:11 [DEBUG] evaluate generation 434: reward = -92.02, steps = 83\n",
      "00:23:12 [DEBUG] evaluate generation 435: reward = -93.06, steps = 76\n",
      "00:23:14 [DEBUG] evaluate generation 436: reward = -92.82, steps = 78\n",
      "00:23:16 [DEBUG] evaluate generation 437: reward = -92.32, steps = 82\n",
      "00:23:18 [DEBUG] evaluate generation 438: reward = -90.83, steps = 84\n",
      "00:23:20 [DEBUG] evaluate generation 439: reward = -92.19, steps = 85\n",
      "00:23:22 [DEBUG] evaluate generation 440: reward = -92.14, steps = 88\n",
      "00:23:24 [DEBUG] evaluate generation 441: reward = -89.61, steps = 91\n",
      "00:23:28 [DEBUG] evaluate generation 442: reward = -91.92, steps = 84\n",
      "00:23:31 [DEBUG] evaluate generation 443: reward = -92.29, steps = 82\n",
      "00:23:33 [DEBUG] evaluate generation 444: reward = -90.76, steps = 86\n",
      "00:23:36 [DEBUG] evaluate generation 445: reward = -92.69, steps = 77\n",
      "00:23:39 [DEBUG] evaluate generation 446: reward = -92.59, steps = 77\n",
      "00:23:40 [DEBUG] evaluate generation 447: reward = -92.49, steps = 81\n",
      "00:23:43 [DEBUG] evaluate generation 448: reward = -92.57, steps = 77\n",
      "00:23:44 [DEBUG] evaluate generation 449: reward = -92.89, steps = 74\n",
      "00:23:46 [DEBUG] evaluate generation 450: reward = -92.66, steps = 77\n",
      "00:23:48 [DEBUG] evaluate generation 451: reward = -92.12, steps = 80\n",
      "00:23:49 [DEBUG] evaluate generation 452: reward = -92.25, steps = 81\n",
      "00:23:52 [DEBUG] evaluate generation 453: reward = -92.51, steps = 78\n",
      "00:23:53 [DEBUG] evaluate generation 454: reward = -93.44, steps = 74\n",
      "00:23:56 [DEBUG] evaluate generation 455: reward = -92.39, steps = 79\n",
      "00:23:58 [DEBUG] evaluate generation 456: reward = -93.64, steps = 72\n",
      "00:23:59 [DEBUG] evaluate generation 457: reward = -92.22, steps = 84\n",
      "00:24:02 [DEBUG] evaluate generation 458: reward = -93.09, steps = 75\n",
      "00:24:04 [DEBUG] evaluate generation 459: reward = -92.26, steps = 80\n",
      "00:24:05 [DEBUG] evaluate generation 460: reward = -92.57, steps = 78\n",
      "00:24:07 [DEBUG] evaluate generation 461: reward = -91.05, steps = 90\n",
      "00:24:09 [DEBUG] evaluate generation 462: reward = -93.11, steps = 77\n",
      "00:24:11 [DEBUG] evaluate generation 463: reward = -89.81, steps = 86\n",
      "00:24:14 [DEBUG] evaluate generation 464: reward = -92.67, steps = 82\n",
      "00:24:17 [DEBUG] evaluate generation 465: reward = -91.59, steps = 88\n",
      "00:24:19 [DEBUG] evaluate generation 466: reward = -91.64, steps = 85\n",
      "00:24:22 [DEBUG] evaluate generation 467: reward = -91.64, steps = 85\n",
      "00:24:25 [DEBUG] evaluate generation 468: reward = -92.01, steps = 82\n",
      "00:24:30 [DEBUG] evaluate generation 469: reward = -91.70, steps = 83\n",
      "00:24:31 [DEBUG] evaluate generation 470: reward = -91.81, steps = 85\n",
      "00:24:33 [DEBUG] evaluate generation 471: reward = -92.36, steps = 83\n",
      "00:24:35 [DEBUG] evaluate generation 472: reward = -91.32, steps = 88\n",
      "00:24:37 [DEBUG] evaluate generation 473: reward = -90.45, steps = 91\n",
      "00:24:40 [DEBUG] evaluate generation 474: reward = -90.49, steps = 89\n",
      "00:24:42 [DEBUG] evaluate generation 475: reward = -90.79, steps = 87\n",
      "00:24:44 [DEBUG] evaluate generation 476: reward = -89.41, steps = 89\n",
      "00:24:46 [DEBUG] evaluate generation 477: reward = -88.61, steps = 100\n",
      "00:24:49 [DEBUG] evaluate generation 478: reward = -92.67, steps = 79\n",
      "00:24:51 [DEBUG] evaluate generation 479: reward = -90.55, steps = 86\n",
      "00:24:57 [DEBUG] evaluate generation 480: reward = -94.30, steps = 67\n",
      "00:24:58 [DEBUG] evaluate generation 481: reward = -94.38, steps = 69\n",
      "00:25:00 [DEBUG] evaluate generation 482: reward = -93.01, steps = 75\n",
      "00:25:01 [DEBUG] evaluate generation 483: reward = -92.05, steps = 81\n",
      "00:25:03 [DEBUG] evaluate generation 484: reward = -91.83, steps = 82\n",
      "00:25:05 [DEBUG] evaluate generation 485: reward = -91.43, steps = 83\n",
      "00:25:06 [DEBUG] evaluate generation 486: reward = -93.30, steps = 75\n",
      "00:25:09 [DEBUG] evaluate generation 487: reward = -91.92, steps = 80\n",
      "00:25:10 [DEBUG] evaluate generation 488: reward = -90.03, steps = 85\n",
      "00:25:12 [DEBUG] evaluate generation 489: reward = -91.01, steps = 83\n",
      "00:25:14 [DEBUG] evaluate generation 490: reward = -88.99, steps = 94\n",
      "00:25:16 [DEBUG] evaluate generation 491: reward = -88.14, steps = 102\n",
      "00:25:19 [DEBUG] evaluate generation 492: reward = -87.99, steps = 98\n",
      "00:25:22 [DEBUG] evaluate generation 493: reward = -88.24, steps = 98\n",
      "00:25:25 [DEBUG] evaluate generation 494: reward = -89.65, steps = 88\n",
      "00:25:27 [DEBUG] evaluate generation 495: reward = -88.19, steps = 97\n",
      "00:25:31 [DEBUG] evaluate generation 496: reward = -90.23, steps = 85\n",
      "00:25:34 [DEBUG] evaluate generation 497: reward = -90.32, steps = 84\n",
      "00:25:35 [DEBUG] evaluate generation 498: reward = -89.24, steps = 87\n",
      "00:25:37 [DEBUG] evaluate generation 499: reward = -92.59, steps = 82\n",
      "00:25:40 [DEBUG] evaluate generation 500: reward = -90.66, steps = 82\n",
      "00:25:42 [DEBUG] evaluate generation 501: reward = -89.09, steps = 87\n",
      "00:25:44 [DEBUG] evaluate generation 502: reward = -89.12, steps = 88\n",
      "00:25:47 [DEBUG] evaluate generation 503: reward = -89.02, steps = 91\n",
      "00:25:49 [DEBUG] evaluate generation 504: reward = -90.75, steps = 88\n",
      "00:25:51 [DEBUG] evaluate generation 505: reward = -90.20, steps = 86\n",
      "00:25:53 [DEBUG] evaluate generation 506: reward = -90.14, steps = 86\n",
      "00:25:54 [DEBUG] evaluate generation 507: reward = -88.79, steps = 91\n",
      "00:25:57 [DEBUG] evaluate generation 508: reward = -89.01, steps = 93\n",
      "00:25:59 [DEBUG] evaluate generation 509: reward = -88.18, steps = 97\n",
      "00:26:02 [DEBUG] evaluate generation 510: reward = -87.74, steps = 99\n",
      "00:26:04 [DEBUG] evaluate generation 511: reward = -87.66, steps = 102\n",
      "00:26:10 [DEBUG] evaluate generation 512: reward = -89.81, steps = 89\n",
      "00:26:12 [DEBUG] evaluate generation 513: reward = -90.73, steps = 86\n",
      "00:26:14 [DEBUG] evaluate generation 514: reward = -90.34, steps = 87\n",
      "00:26:16 [DEBUG] evaluate generation 515: reward = -90.02, steps = 87\n",
      "00:26:18 [DEBUG] evaluate generation 516: reward = -92.04, steps = 83\n",
      "00:26:19 [DEBUG] evaluate generation 517: reward = -89.00, steps = 96\n",
      "00:26:22 [DEBUG] evaluate generation 518: reward = -88.77, steps = 97\n",
      "00:26:24 [DEBUG] evaluate generation 519: reward = -89.15, steps = 94\n",
      "00:26:26 [DEBUG] evaluate generation 520: reward = -87.48, steps = 101\n",
      "00:26:28 [DEBUG] evaluate generation 521: reward = -88.00, steps = 103\n",
      "00:26:30 [DEBUG] evaluate generation 522: reward = -87.68, steps = 105\n",
      "00:26:32 [DEBUG] evaluate generation 523: reward = -86.58, steps = 107\n",
      "00:26:35 [DEBUG] evaluate generation 524: reward = -86.04, steps = 108\n",
      "00:26:38 [DEBUG] evaluate generation 525: reward = -88.93, steps = 94\n",
      "00:26:40 [DEBUG] evaluate generation 526: reward = -88.36, steps = 96\n",
      "00:26:42 [DEBUG] evaluate generation 527: reward = -88.80, steps = 102\n",
      "00:26:44 [DEBUG] evaluate generation 528: reward = -87.51, steps = 103\n",
      "00:26:46 [DEBUG] evaluate generation 529: reward = -86.62, steps = 105\n",
      "00:26:48 [DEBUG] evaluate generation 530: reward = -86.29, steps = 110\n",
      "00:26:51 [DEBUG] evaluate generation 531: reward = -87.47, steps = 99\n",
      "00:26:54 [DEBUG] evaluate generation 532: reward = -87.80, steps = 99\n",
      "00:26:56 [DEBUG] evaluate generation 533: reward = -88.22, steps = 96\n",
      "00:26:58 [DEBUG] evaluate generation 534: reward = -88.29, steps = 100\n",
      "00:27:00 [DEBUG] evaluate generation 535: reward = -88.20, steps = 94\n",
      "00:27:02 [DEBUG] evaluate generation 536: reward = -87.14, steps = 107\n",
      "00:27:04 [DEBUG] evaluate generation 537: reward = -86.95, steps = 104\n",
      "00:27:06 [DEBUG] evaluate generation 538: reward = -86.34, steps = 103\n",
      "00:27:08 [DEBUG] evaluate generation 539: reward = -87.32, steps = 103\n",
      "00:27:10 [DEBUG] evaluate generation 540: reward = -85.30, steps = 109\n",
      "00:27:14 [DEBUG] evaluate generation 541: reward = -83.47, steps = 115\n",
      "00:27:16 [DEBUG] evaluate generation 542: reward = -84.33, steps = 107\n",
      "00:27:18 [DEBUG] evaluate generation 543: reward = -82.28, steps = 115\n",
      "00:27:21 [DEBUG] evaluate generation 544: reward = -81.42, steps = 120\n",
      "00:27:24 [DEBUG] evaluate generation 545: reward = -81.54, steps = 121\n",
      "00:27:26 [DEBUG] evaluate generation 546: reward = -82.79, steps = 112\n",
      "00:27:29 [DEBUG] evaluate generation 547: reward = -80.56, steps = 125\n",
      "00:27:31 [DEBUG] evaluate generation 548: reward = -68.97, steps = 170\n",
      "00:27:35 [DEBUG] evaluate generation 549: reward = -65.92, steps = 197\n",
      "00:27:39 [DEBUG] evaluate generation 550: reward = -81.30, steps = 114\n",
      "00:27:41 [DEBUG] evaluate generation 551: reward = -81.11, steps = 118\n",
      "00:27:43 [DEBUG] evaluate generation 552: reward = -83.24, steps = 107\n",
      "00:27:45 [DEBUG] evaluate generation 553: reward = -72.81, steps = 132\n",
      "00:27:47 [DEBUG] evaluate generation 554: reward = -101.91, steps = 135\n",
      "00:27:50 [DEBUG] evaluate generation 555: reward = -80.77, steps = 114\n",
      "00:27:52 [DEBUG] evaluate generation 556: reward = -82.26, steps = 116\n",
      "00:27:54 [DEBUG] evaluate generation 557: reward = -82.73, steps = 116\n",
      "00:27:56 [DEBUG] evaluate generation 558: reward = -82.38, steps = 116\n",
      "00:27:58 [DEBUG] evaluate generation 559: reward = -78.81, steps = 126\n",
      "00:28:01 [DEBUG] evaluate generation 560: reward = -84.63, steps = 98\n",
      "00:28:04 [DEBUG] evaluate generation 561: reward = -73.29, steps = 166\n",
      "00:28:07 [DEBUG] evaluate generation 562: reward = -76.76, steps = 135\n",
      "00:28:09 [DEBUG] evaluate generation 563: reward = -67.43, steps = 192\n",
      "00:28:12 [DEBUG] evaluate generation 564: reward = -71.69, steps = 155\n",
      "00:28:15 [DEBUG] evaluate generation 565: reward = -83.45, steps = 118\n",
      "00:28:18 [DEBUG] evaluate generation 566: reward = -63.22, steps = 211\n",
      "00:28:21 [DEBUG] evaluate generation 567: reward = -67.10, steps = 233\n",
      "00:28:25 [DEBUG] evaluate generation 568: reward = -66.44, steps = 191\n",
      "00:28:28 [DEBUG] evaluate generation 569: reward = -64.18, steps = 195\n",
      "00:28:32 [DEBUG] evaluate generation 570: reward = -57.64, steps = 212\n",
      "00:28:36 [DEBUG] evaluate generation 571: reward = -53.65, steps = 283\n",
      "00:28:43 [DEBUG] evaluate generation 572: reward = -75.04, steps = 135\n",
      "00:28:45 [DEBUG] evaluate generation 573: reward = -69.18, steps = 155\n",
      "00:28:48 [DEBUG] evaluate generation 574: reward = -75.50, steps = 136\n",
      "00:28:53 [DEBUG] evaluate generation 575: reward = -80.18, steps = 389\n",
      "00:29:02 [DEBUG] evaluate generation 576: reward = -86.52, steps = 104\n",
      "00:29:06 [DEBUG] evaluate generation 577: reward = -75.12, steps = 268\n",
      "00:29:20 [DEBUG] evaluate generation 578: reward = -88.71, steps = 111\n",
      "00:29:24 [DEBUG] evaluate generation 579: reward = -3.23, steps = 721\n",
      "00:29:32 [DEBUG] evaluate generation 580: reward = -53.31, steps = 234\n",
      "00:29:39 [DEBUG] evaluate generation 581: reward = -53.73, steps = 310\n",
      "00:29:45 [DEBUG] evaluate generation 582: reward = 40.25, steps = 1378\n",
      "00:30:00 [DEBUG] evaluate generation 583: reward = -95.35, steps = 121\n",
      "00:30:11 [DEBUG] evaluate generation 584: reward = -107.11, steps = 62\n",
      "00:30:13 [DEBUG] evaluate generation 585: reward = -103.34, steps = 55\n",
      "00:30:15 [DEBUG] evaluate generation 586: reward = -102.47, steps = 57\n",
      "00:30:16 [DEBUG] evaluate generation 587: reward = -96.87, steps = 87\n",
      "00:30:18 [DEBUG] evaluate generation 588: reward = -98.55, steps = 72\n",
      "00:30:20 [DEBUG] evaluate generation 589: reward = -98.40, steps = 73\n",
      "00:30:22 [DEBUG] evaluate generation 590: reward = -93.94, steps = 88\n",
      "00:30:24 [DEBUG] evaluate generation 591: reward = -75.77, steps = 164\n",
      "00:30:26 [DEBUG] evaluate generation 592: reward = -80.02, steps = 158\n",
      "00:30:28 [DEBUG] evaluate generation 593: reward = -73.42, steps = 200\n",
      "00:30:33 [DEBUG] evaluate generation 594: reward = -80.04, steps = 135\n",
      "00:30:43 [DEBUG] evaluate generation 595: reward = 60.82, steps = 1600\n",
      "00:31:00 [DEBUG] evaluate generation 596: reward = -97.49, steps = 83\n",
      "00:31:07 [DEBUG] evaluate generation 597: reward = -128.73, steps = 1600\n",
      "00:31:22 [DEBUG] evaluate generation 598: reward = -98.18, steps = 109\n",
      "00:31:29 [DEBUG] evaluate generation 599: reward = -94.99, steps = 94\n",
      "00:31:30 [DEBUG] evaluate generation 600: reward = -94.69, steps = 90\n",
      "00:31:32 [DEBUG] evaluate generation 601: reward = -93.46, steps = 100\n",
      "00:31:35 [DEBUG] evaluate generation 602: reward = -94.29, steps = 98\n",
      "00:31:37 [DEBUG] evaluate generation 603: reward = -93.04, steps = 103\n",
      "00:31:39 [DEBUG] evaluate generation 604: reward = -94.55, steps = 104\n",
      "00:31:41 [DEBUG] evaluate generation 605: reward = -91.30, steps = 106\n",
      "00:31:44 [DEBUG] evaluate generation 606: reward = -95.63, steps = 94\n",
      "00:31:46 [DEBUG] evaluate generation 607: reward = -90.81, steps = 108\n",
      "00:31:48 [DEBUG] evaluate generation 608: reward = -92.17, steps = 103\n",
      "00:31:51 [DEBUG] evaluate generation 609: reward = -91.89, steps = 106\n",
      "00:31:53 [DEBUG] evaluate generation 610: reward = -92.40, steps = 104\n",
      "00:31:55 [DEBUG] evaluate generation 611: reward = -91.65, steps = 102\n",
      "00:31:58 [DEBUG] evaluate generation 612: reward = -94.01, steps = 95\n",
      "00:32:00 [DEBUG] evaluate generation 613: reward = -90.92, steps = 106\n",
      "00:32:02 [DEBUG] evaluate generation 614: reward = -92.46, steps = 96\n",
      "00:32:04 [DEBUG] evaluate generation 615: reward = -91.80, steps = 107\n",
      "00:32:07 [DEBUG] evaluate generation 616: reward = -88.82, steps = 107\n",
      "00:32:09 [DEBUG] evaluate generation 617: reward = -90.34, steps = 107\n",
      "00:32:12 [DEBUG] evaluate generation 618: reward = -91.84, steps = 105\n",
      "00:32:14 [DEBUG] evaluate generation 619: reward = -87.39, steps = 124\n",
      "00:32:18 [DEBUG] evaluate generation 620: reward = -88.07, steps = 124\n",
      "00:32:22 [DEBUG] evaluate generation 621: reward = -87.00, steps = 111\n",
      "00:32:29 [DEBUG] evaluate generation 622: reward = -83.72, steps = 126\n",
      "00:32:41 [DEBUG] evaluate generation 623: reward = 30.28, steps = 1600\n",
      "00:33:01 [DEBUG] evaluate generation 624: reward = -115.19, steps = 83\n",
      "00:33:03 [DEBUG] evaluate generation 625: reward = -64.10, steps = 323\n",
      "00:33:13 [DEBUG] evaluate generation 626: reward = -78.63, steps = 527\n",
      "00:33:29 [DEBUG] evaluate generation 627: reward = -45.67, steps = 609\n",
      "00:33:37 [DEBUG] evaluate generation 628: reward = -8.58, steps = 1600\n",
      "00:34:01 [DEBUG] evaluate generation 629: reward = -97.06, steps = 93\n",
      "00:34:03 [DEBUG] evaluate generation 630: reward = -88.63, steps = 144\n",
      "00:34:06 [DEBUG] evaluate generation 631: reward = -56.30, steps = 340\n",
      "00:34:12 [DEBUG] evaluate generation 632: reward = 53.08, steps = 1600\n",
      "00:34:40 [DEBUG] evaluate generation 633: reward = -98.89, steps = 68\n",
      "00:34:41 [DEBUG] evaluate generation 634: reward = -94.84, steps = 81\n",
      "00:34:43 [DEBUG] evaluate generation 635: reward = -94.63, steps = 83\n",
      "00:34:46 [DEBUG] evaluate generation 636: reward = -79.27, steps = 154\n",
      "00:34:52 [DEBUG] evaluate generation 637: reward = -71.75, steps = 183\n",
      "00:35:12 [DEBUG] evaluate generation 638: reward = -93.78, steps = 93\n",
      "00:35:18 [DEBUG] evaluate generation 639: reward = -15.95, steps = 857\n",
      "00:35:34 [DEBUG] evaluate generation 640: reward = 65.04, steps = 1600\n",
      "00:35:59 [DEBUG] evaluate generation 641: reward = -97.11, steps = 80\n",
      "00:36:04 [DEBUG] evaluate generation 642: reward = 135.59, steps = 1600\n",
      "00:36:28 [DEBUG] evaluate generation 643: reward = -115.13, steps = 70\n",
      "00:36:30 [DEBUG] evaluate generation 644: reward = -111.53, steps = 71\n",
      "00:36:32 [DEBUG] evaluate generation 645: reward = -77.57, steps = 169\n",
      "00:36:36 [DEBUG] evaluate generation 646: reward = -71.62, steps = 402\n",
      "00:36:44 [DEBUG] evaluate generation 647: reward = -62.37, steps = 399\n",
      "00:37:01 [DEBUG] evaluate generation 648: reward = -68.73, steps = 307\n",
      "00:37:08 [DEBUG] evaluate generation 649: reward = -77.86, steps = 1600\n",
      "00:37:35 [DEBUG] evaluate generation 650: reward = -47.39, steps = 439\n",
      "00:37:46 [DEBUG] evaluate generation 651: reward = 20.01, steps = 1100\n",
      "00:37:59 [DEBUG] evaluate generation 652: reward = 56.53, steps = 1600\n",
      "00:38:29 [DEBUG] evaluate generation 653: reward = 69.16, steps = 1600\n",
      "00:38:58 [DEBUG] evaluate generation 654: reward = 98.09, steps = 1600\n",
      "00:39:24 [DEBUG] evaluate generation 655: reward = 45.66, steps = 1600\n",
      "00:39:54 [DEBUG] evaluate generation 656: reward = -29.98, steps = 795\n",
      "00:40:13 [DEBUG] evaluate generation 657: reward = -38.59, steps = 1600\n",
      "00:40:45 [DEBUG] evaluate generation 658: reward = 1.47, steps = 1600\n",
      "00:41:15 [DEBUG] evaluate generation 659: reward = 114.46, steps = 1600\n",
      "00:41:44 [DEBUG] evaluate generation 660: reward = 24.30, steps = 1026\n",
      "00:42:02 [DEBUG] evaluate generation 661: reward = 25.24, steps = 1600\n",
      "00:42:33 [DEBUG] evaluate generation 662: reward = 26.67, steps = 1600\n",
      "00:43:02 [DEBUG] evaluate generation 663: reward = 197.75, steps = 1600\n",
      "00:43:27 [DEBUG] evaluate generation 664: reward = 93.20, steps = 1600\n",
      "00:43:57 [DEBUG] evaluate generation 665: reward = 168.31, steps = 1600\n",
      "00:44:22 [DEBUG] evaluate generation 666: reward = 127.65, steps = 1600\n",
      "00:44:51 [DEBUG] evaluate generation 667: reward = -70.96, steps = 317\n",
      "00:45:07 [DEBUG] evaluate generation 668: reward = -25.58, steps = 1600\n",
      "00:45:38 [DEBUG] evaluate generation 669: reward = -9.13, steps = 1600\n",
      "00:46:08 [DEBUG] evaluate generation 670: reward = 63.14, steps = 1600\n",
      "00:46:38 [DEBUG] evaluate generation 671: reward = 10.36, steps = 1600\n",
      "00:47:06 [DEBUG] evaluate generation 672: reward = -121.13, steps = 98\n",
      "00:47:13 [DEBUG] evaluate generation 673: reward = 83.24, steps = 1600\n",
      "00:47:38 [DEBUG] evaluate generation 674: reward = -42.57, steps = 1577\n",
      "00:48:04 [DEBUG] evaluate generation 675: reward = 71.36, steps = 1600\n",
      "00:48:28 [DEBUG] evaluate generation 676: reward = -113.17, steps = 777\n",
      "00:48:53 [DEBUG] evaluate generation 677: reward = -64.53, steps = 654\n",
      "00:49:16 [DEBUG] evaluate generation 678: reward = 56.95, steps = 1600\n",
      "00:49:43 [DEBUG] evaluate generation 679: reward = -97.53, steps = 1600\n",
      "00:50:06 [DEBUG] evaluate generation 680: reward = -108.92, steps = 69\n",
      "00:50:20 [DEBUG] evaluate generation 681: reward = 78.01, steps = 1600\n",
      "00:50:34 [DEBUG] evaluate generation 682: reward = -115.59, steps = 62\n",
      "00:50:36 [DEBUG] evaluate generation 683: reward = -109.30, steps = 70\n",
      "00:50:44 [DEBUG] evaluate generation 684: reward = -76.15, steps = 239\n",
      "00:50:53 [DEBUG] evaluate generation 685: reward = -84.03, steps = 330\n",
      "00:51:17 [DEBUG] evaluate generation 686: reward = -118.13, steps = 199\n",
      "00:51:20 [DEBUG] evaluate generation 687: reward = -99.18, steps = 166\n",
      "00:51:22 [DEBUG] evaluate generation 688: reward = -89.39, steps = 173\n",
      "00:51:25 [DEBUG] evaluate generation 689: reward = -114.47, steps = 100\n",
      "00:51:29 [DEBUG] evaluate generation 690: reward = -66.70, steps = 370\n",
      "00:51:32 [DEBUG] evaluate generation 691: reward = -110.66, steps = 98\n",
      "00:51:35 [DEBUG] evaluate generation 692: reward = -84.90, steps = 169\n",
      "00:51:39 [DEBUG] evaluate generation 693: reward = -89.77, steps = 339\n",
      "00:51:42 [DEBUG] evaluate generation 694: reward = -106.46, steps = 134\n",
      "00:51:46 [DEBUG] evaluate generation 695: reward = -91.23, steps = 196\n",
      "00:51:49 [DEBUG] evaluate generation 696: reward = -107.07, steps = 106\n",
      "00:51:55 [DEBUG] evaluate generation 697: reward = -82.24, steps = 124\n",
      "00:52:01 [DEBUG] evaluate generation 698: reward = -64.66, steps = 316\n",
      "00:52:06 [DEBUG] evaluate generation 699: reward = -91.69, steps = 115\n",
      "00:52:12 [DEBUG] evaluate generation 700: reward = -33.37, steps = 472\n",
      "00:52:17 [DEBUG] evaluate generation 701: reward = -94.24, steps = 108\n",
      "00:52:23 [DEBUG] evaluate generation 702: reward = -58.26, steps = 747\n",
      "00:52:32 [DEBUG] evaluate generation 703: reward = -26.80, steps = 801\n",
      "00:52:53 [DEBUG] evaluate generation 704: reward = -57.74, steps = 1494\n",
      "00:53:21 [DEBUG] evaluate generation 705: reward = 158.33, steps = 1600\n",
      "00:53:38 [DEBUG] evaluate generation 706: reward = -97.25, steps = 132\n",
      "00:53:48 [DEBUG] evaluate generation 707: reward = -125.16, steps = 121\n",
      "00:53:56 [DEBUG] evaluate generation 708: reward = 42.08, steps = 1438\n",
      "00:54:09 [DEBUG] evaluate generation 709: reward = 115.54, steps = 1600\n",
      "00:54:39 [DEBUG] evaluate generation 710: reward = -24.86, steps = 630\n",
      "00:54:49 [DEBUG] evaluate generation 711: reward = 126.28, steps = 1600\n",
      "00:55:17 [DEBUG] evaluate generation 712: reward = -32.66, steps = 500\n",
      "00:55:38 [DEBUG] evaluate generation 713: reward = 87.51, steps = 1600\n",
      "00:56:02 [DEBUG] evaluate generation 714: reward = -62.11, steps = 496\n",
      "00:56:26 [DEBUG] evaluate generation 715: reward = 166.12, steps = 1600\n",
      "00:56:55 [DEBUG] evaluate generation 716: reward = 126.81, steps = 1600\n",
      "00:57:25 [DEBUG] evaluate generation 717: reward = 183.11, steps = 1600\n",
      "00:57:53 [DEBUG] evaluate generation 718: reward = 120.87, steps = 1600\n",
      "00:58:23 [DEBUG] evaluate generation 719: reward = 97.14, steps = 1600\n",
      "00:58:54 [DEBUG] evaluate generation 720: reward = 77.86, steps = 1600\n",
      "00:59:22 [DEBUG] evaluate generation 721: reward = -72.93, steps = 429\n",
      "00:59:39 [DEBUG] evaluate generation 722: reward = 176.06, steps = 1600\n",
      "01:00:04 [DEBUG] evaluate generation 723: reward = -121.92, steps = 1600\n",
      "01:00:33 [DEBUG] evaluate generation 724: reward = 121.38, steps = 1600\n",
      "01:01:03 [DEBUG] evaluate generation 725: reward = -97.63, steps = 90\n",
      "01:01:07 [DEBUG] evaluate generation 726: reward = -102.87, steps = 133\n",
      "01:01:17 [DEBUG] evaluate generation 727: reward = -30.24, steps = 710\n",
      "01:01:40 [DEBUG] evaluate generation 728: reward = -95.48, steps = 107\n",
      "01:01:43 [DEBUG] evaluate generation 729: reward = -90.10, steps = 160\n",
      "01:01:48 [DEBUG] evaluate generation 730: reward = -84.08, steps = 201\n",
      "01:01:55 [DEBUG] evaluate generation 731: reward = -53.14, steps = 335\n",
      "01:02:12 [DEBUG] evaluate generation 732: reward = 122.44, steps = 1600\n",
      "01:02:42 [DEBUG] evaluate generation 733: reward = -139.17, steps = 1600\n",
      "01:03:06 [DEBUG] evaluate generation 734: reward = -99.76, steps = 97\n",
      "01:03:28 [DEBUG] evaluate generation 735: reward = 110.30, steps = 1600\n",
      "01:03:56 [DEBUG] evaluate generation 736: reward = 104.57, steps = 1600\n",
      "01:04:23 [DEBUG] evaluate generation 737: reward = 92.69, steps = 1600\n",
      "01:04:49 [DEBUG] evaluate generation 738: reward = 102.10, steps = 1600\n",
      "01:05:18 [DEBUG] evaluate generation 739: reward = 121.05, steps = 1600\n",
      "01:05:47 [DEBUG] evaluate generation 740: reward = 97.34, steps = 1600\n",
      "01:06:17 [DEBUG] evaluate generation 741: reward = -27.50, steps = 966\n",
      "01:06:48 [DEBUG] evaluate generation 742: reward = -77.51, steps = 579\n",
      "01:07:16 [DEBUG] evaluate generation 743: reward = -16.90, steps = 1019\n",
      "01:07:43 [DEBUG] evaluate generation 744: reward = -125.51, steps = 1600\n",
      "01:08:10 [DEBUG] evaluate generation 745: reward = 136.72, steps = 1600\n",
      "01:08:33 [DEBUG] evaluate generation 746: reward = -81.06, steps = 346\n",
      "01:08:58 [DEBUG] evaluate generation 747: reward = 143.35, steps = 1600\n",
      "01:09:28 [DEBUG] evaluate generation 748: reward = 129.72, steps = 1600\n",
      "01:09:55 [DEBUG] evaluate generation 749: reward = 86.71, steps = 1600\n",
      "01:10:25 [DEBUG] evaluate generation 750: reward = 114.81, steps = 1600\n",
      "01:10:54 [DEBUG] evaluate generation 751: reward = 117.70, steps = 1600\n",
      "01:11:23 [DEBUG] evaluate generation 752: reward = 140.29, steps = 1600\n",
      "01:11:52 [DEBUG] evaluate generation 753: reward = 141.92, steps = 1600\n",
      "01:12:21 [DEBUG] evaluate generation 754: reward = 117.98, steps = 1600\n",
      "01:12:50 [DEBUG] evaluate generation 755: reward = 122.68, steps = 1600\n",
      "01:13:17 [DEBUG] evaluate generation 756: reward = 164.29, steps = 1600\n",
      "01:13:45 [DEBUG] evaluate generation 757: reward = 170.40, steps = 1600\n",
      "01:14:14 [DEBUG] evaluate generation 758: reward = -89.30, steps = 327\n",
      "01:14:38 [DEBUG] evaluate generation 759: reward = 118.54, steps = 1600\n",
      "01:15:07 [DEBUG] evaluate generation 760: reward = 161.18, steps = 1600\n",
      "01:15:35 [DEBUG] evaluate generation 761: reward = 30.81, steps = 1001\n",
      "01:15:53 [DEBUG] evaluate generation 762: reward = 47.21, steps = 1600\n",
      "01:16:23 [DEBUG] evaluate generation 763: reward = 59.25, steps = 1600\n",
      "01:16:53 [DEBUG] evaluate generation 764: reward = 95.86, steps = 1600\n",
      "01:17:22 [DEBUG] evaluate generation 765: reward = 157.91, steps = 1600\n",
      "01:17:53 [DEBUG] evaluate generation 766: reward = 150.65, steps = 1600\n",
      "01:18:24 [DEBUG] evaluate generation 767: reward = 129.11, steps = 1600\n",
      "01:18:54 [DEBUG] evaluate generation 768: reward = 124.15, steps = 1600\n",
      "01:19:26 [DEBUG] evaluate generation 769: reward = 185.81, steps = 1600\n",
      "01:19:57 [DEBUG] evaluate generation 770: reward = 141.42, steps = 1600\n",
      "01:20:27 [DEBUG] evaluate generation 771: reward = 4.72, steps = 1097\n",
      "01:20:59 [DEBUG] evaluate generation 772: reward = 204.74, steps = 1600\n",
      "01:21:26 [DEBUG] evaluate generation 773: reward = -92.62, steps = 149\n",
      "01:21:54 [DEBUG] evaluate generation 774: reward = 149.02, steps = 1600\n",
      "01:22:25 [DEBUG] evaluate generation 775: reward = 190.06, steps = 1600\n",
      "01:22:51 [DEBUG] evaluate generation 776: reward = 144.96, steps = 1600\n",
      "01:23:20 [DEBUG] evaluate generation 777: reward = 170.60, steps = 1600\n",
      "01:23:51 [DEBUG] evaluate generation 778: reward = 150.15, steps = 1600\n",
      "01:24:21 [DEBUG] evaluate generation 779: reward = 164.23, steps = 1600\n",
      "01:24:50 [DEBUG] evaluate generation 780: reward = 158.35, steps = 1600\n",
      "01:25:19 [DEBUG] evaluate generation 781: reward = -51.99, steps = 591\n",
      "01:25:45 [DEBUG] evaluate generation 782: reward = 190.31, steps = 1600\n",
      "01:26:10 [DEBUG] evaluate generation 783: reward = 17.73, steps = 1276\n",
      "01:26:36 [DEBUG] evaluate generation 784: reward = 219.54, steps = 1600\n",
      "01:27:04 [DEBUG] evaluate generation 785: reward = 192.08, steps = 1600\n",
      "01:27:32 [DEBUG] evaluate generation 786: reward = 197.40, steps = 1600\n",
      "01:27:56 [DEBUG] evaluate generation 787: reward = 98.78, steps = 1600\n",
      "01:28:26 [DEBUG] evaluate generation 788: reward = 108.58, steps = 1600\n",
      "01:28:57 [DEBUG] evaluate generation 789: reward = 131.42, steps = 1600\n",
      "01:29:28 [DEBUG] evaluate generation 790: reward = 206.52, steps = 1600\n",
      "01:29:58 [DEBUG] evaluate generation 791: reward = 200.44, steps = 1600\n",
      "01:30:31 [DEBUG] evaluate generation 792: reward = 196.37, steps = 1600\n",
      "01:30:59 [DEBUG] evaluate generation 793: reward = -49.54, steps = 1055\n",
      "01:31:25 [DEBUG] evaluate generation 794: reward = -13.57, steps = 734\n",
      "01:31:54 [DEBUG] evaluate generation 795: reward = 223.32, steps = 1556\n",
      "01:32:16 [DEBUG] evaluate generation 796: reward = 183.09, steps = 1600\n",
      "01:32:45 [DEBUG] evaluate generation 797: reward = 181.30, steps = 1600\n",
      "01:33:12 [DEBUG] evaluate generation 798: reward = 164.70, steps = 1600\n",
      "01:33:43 [DEBUG] evaluate generation 799: reward = 194.39, steps = 1600\n",
      "01:34:11 [DEBUG] evaluate generation 800: reward = 204.27, steps = 1600\n",
      "01:34:42 [DEBUG] evaluate generation 801: reward = 203.59, steps = 1600\n",
      "01:35:14 [DEBUG] evaluate generation 802: reward = 208.83, steps = 1600\n",
      "01:35:41 [DEBUG] evaluate generation 803: reward = -18.96, steps = 625\n",
      "01:36:05 [DEBUG] evaluate generation 804: reward = 160.46, steps = 1600\n",
      "01:36:35 [DEBUG] evaluate generation 805: reward = 208.16, steps = 1600\n",
      "01:37:05 [DEBUG] evaluate generation 806: reward = 190.17, steps = 1600\n",
      "01:37:34 [DEBUG] evaluate generation 807: reward = 170.70, steps = 1600\n",
      "01:38:05 [DEBUG] evaluate generation 808: reward = 144.47, steps = 1600\n",
      "01:38:35 [DEBUG] evaluate generation 809: reward = 219.74, steps = 1583\n",
      "01:39:04 [DEBUG] evaluate generation 810: reward = 215.70, steps = 1581\n",
      "01:39:27 [DEBUG] evaluate generation 811: reward = 39.29, steps = 1403\n",
      "01:39:53 [DEBUG] evaluate generation 812: reward = 197.13, steps = 1600\n",
      "01:40:20 [DEBUG] evaluate generation 813: reward = 177.28, steps = 1600\n",
      "01:40:48 [DEBUG] evaluate generation 814: reward = 209.48, steps = 1600\n",
      "01:41:15 [DEBUG] evaluate generation 815: reward = 18.35, steps = 1028\n",
      "01:41:43 [DEBUG] evaluate generation 816: reward = 227.63, steps = 1461\n",
      "01:42:06 [DEBUG] evaluate generation 817: reward = 217.51, steps = 1574\n",
      "01:42:34 [DEBUG] evaluate generation 818: reward = 199.50, steps = 1600\n",
      "01:43:06 [DEBUG] evaluate generation 819: reward = 215.12, steps = 1597\n",
      "01:43:32 [DEBUG] evaluate generation 820: reward = 208.63, steps = 1600\n",
      "01:44:02 [DEBUG] evaluate generation 821: reward = 224.07, steps = 1486\n",
      "01:44:30 [DEBUG] evaluate generation 822: reward = 210.61, steps = 1600\n",
      "01:44:57 [DEBUG] evaluate generation 823: reward = 220.62, steps = 1514\n",
      "01:45:25 [DEBUG] evaluate generation 824: reward = 194.16, steps = 1600\n",
      "01:45:56 [DEBUG] evaluate generation 825: reward = 204.81, steps = 1600\n",
      "01:45:56 [INFO] ==== test ====\n",
      "01:45:57 [DEBUG] test episode 0: reward = 198.63, steps = 1600\n",
      "01:45:57 [DEBUG] test episode 1: reward = 214.30, steps = 1600\n",
      "01:45:58 [DEBUG] test episode 2: reward = 187.62, steps = 1600\n",
      "01:45:59 [DEBUG] test episode 3: reward = 188.54, steps = 1600\n",
      "01:46:00 [DEBUG] test episode 4: reward = 201.10, steps = 1600\n",
      "01:46:01 [DEBUG] test episode 5: reward = 191.86, steps = 1600\n",
      "01:46:02 [DEBUG] test episode 6: reward = 169.99, steps = 1600\n",
      "01:46:03 [DEBUG] test episode 7: reward = 219.32, steps = 1549\n",
      "01:46:04 [DEBUG] test episode 8: reward = 206.25, steps = 1600\n",
      "01:46:05 [DEBUG] test episode 9: reward = 181.40, steps = 1600\n",
      "01:46:05 [DEBUG] test episode 10: reward = 159.98, steps = 1600\n",
      "01:46:06 [DEBUG] test episode 11: reward = 185.03, steps = 1600\n",
      "01:46:07 [DEBUG] test episode 12: reward = 208.14, steps = 1600\n",
      "01:46:08 [DEBUG] test episode 13: reward = 199.01, steps = 1600\n",
      "01:46:09 [DEBUG] test episode 14: reward = 142.58, steps = 1600\n",
      "01:46:10 [DEBUG] test episode 15: reward = 157.67, steps = 1600\n",
      "01:46:11 [DEBUG] test episode 16: reward = 178.38, steps = 1600\n",
      "01:46:12 [DEBUG] test episode 17: reward = 184.99, steps = 1600\n",
      "01:46:13 [DEBUG] test episode 18: reward = 196.74, steps = 1600\n",
      "01:46:14 [DEBUG] test episode 19: reward = 183.32, steps = 1600\n",
      "01:46:15 [DEBUG] test episode 20: reward = 209.74, steps = 1600\n",
      "01:46:16 [DEBUG] test episode 21: reward = 164.58, steps = 1600\n",
      "01:46:16 [DEBUG] test episode 22: reward = 192.50, steps = 1600\n",
      "01:46:17 [DEBUG] test episode 23: reward = 24.11, steps = 1542\n",
      "01:46:18 [DEBUG] test episode 24: reward = 170.73, steps = 1600\n",
      "01:46:19 [DEBUG] test episode 25: reward = 198.98, steps = 1600\n",
      "01:46:20 [DEBUG] test episode 26: reward = 186.42, steps = 1600\n",
      "01:46:21 [DEBUG] test episode 27: reward = 205.76, steps = 1600\n",
      "01:46:22 [DEBUG] test episode 28: reward = 205.77, steps = 1600\n",
      "01:46:23 [DEBUG] test episode 29: reward = 200.57, steps = 1600\n",
      "01:46:24 [DEBUG] test episode 30: reward = 194.09, steps = 1600\n",
      "01:46:24 [DEBUG] test episode 31: reward = 200.73, steps = 1600\n",
      "01:46:25 [DEBUG] test episode 32: reward = 213.55, steps = 1572\n",
      "01:46:26 [DEBUG] test episode 33: reward = 202.87, steps = 1600\n",
      "01:46:27 [DEBUG] test episode 34: reward = 210.49, steps = 1600\n",
      "01:46:28 [DEBUG] test episode 35: reward = 53.06, steps = 1543\n",
      "01:46:29 [DEBUG] test episode 36: reward = 174.22, steps = 1600\n",
      "01:46:30 [DEBUG] test episode 37: reward = 162.57, steps = 1600\n",
      "01:46:31 [DEBUG] test episode 38: reward = 183.49, steps = 1600\n",
      "01:46:31 [DEBUG] test episode 39: reward = 206.19, steps = 1600\n",
      "01:46:32 [DEBUG] test episode 40: reward = 215.01, steps = 1591\n",
      "01:46:33 [DEBUG] test episode 41: reward = 189.69, steps = 1600\n",
      "01:46:34 [DEBUG] test episode 42: reward = 212.72, steps = 1600\n",
      "01:46:35 [DEBUG] test episode 43: reward = 203.49, steps = 1600\n",
      "01:46:36 [DEBUG] test episode 44: reward = 178.35, steps = 1600\n",
      "01:46:36 [DEBUG] test episode 45: reward = 216.92, steps = 1546\n",
      "01:46:37 [DEBUG] test episode 46: reward = 179.52, steps = 1600\n",
      "01:46:38 [DEBUG] test episode 47: reward = 221.49, steps = 1514\n",
      "01:46:39 [DEBUG] test episode 48: reward = 201.52, steps = 1600\n",
      "01:46:40 [DEBUG] test episode 49: reward = 194.13, steps = 1600\n",
      "01:46:41 [DEBUG] test episode 50: reward = 214.08, steps = 1598\n",
      "01:46:42 [DEBUG] test episode 51: reward = 201.02, steps = 1600\n",
      "01:46:42 [DEBUG] test episode 52: reward = 209.53, steps = 1600\n",
      "01:46:43 [DEBUG] test episode 53: reward = 189.02, steps = 1600\n",
      "01:46:44 [DEBUG] test episode 54: reward = 218.71, steps = 1526\n",
      "01:46:45 [DEBUG] test episode 55: reward = 206.27, steps = 1600\n",
      "01:46:46 [DEBUG] test episode 56: reward = 210.93, steps = 1599\n",
      "01:46:47 [DEBUG] test episode 57: reward = 217.93, steps = 1563\n",
      "01:46:48 [DEBUG] test episode 58: reward = 209.36, steps = 1600\n",
      "01:46:48 [DEBUG] test episode 59: reward = 220.93, steps = 1529\n",
      "01:46:49 [DEBUG] test episode 60: reward = 220.72, steps = 1530\n",
      "01:46:50 [DEBUG] test episode 61: reward = 207.05, steps = 1600\n",
      "01:46:51 [DEBUG] test episode 62: reward = 179.34, steps = 1600\n",
      "01:46:52 [DEBUG] test episode 63: reward = 200.20, steps = 1600\n",
      "01:46:53 [DEBUG] test episode 64: reward = 190.57, steps = 1600\n",
      "01:46:54 [DEBUG] test episode 65: reward = 184.90, steps = 1600\n",
      "01:46:54 [DEBUG] test episode 66: reward = 220.28, steps = 1517\n",
      "01:46:55 [DEBUG] test episode 67: reward = 172.18, steps = 1600\n",
      "01:46:56 [DEBUG] test episode 68: reward = 214.25, steps = 1584\n",
      "01:46:57 [DEBUG] test episode 69: reward = 225.22, steps = 1458\n",
      "01:46:58 [DEBUG] test episode 70: reward = 214.61, steps = 1585\n",
      "01:46:58 [DEBUG] test episode 71: reward = 198.05, steps = 1600\n",
      "01:46:59 [DEBUG] test episode 72: reward = 205.03, steps = 1600\n",
      "01:47:00 [DEBUG] test episode 73: reward = 197.68, steps = 1600\n",
      "01:47:01 [DEBUG] test episode 74: reward = 159.38, steps = 1600\n",
      "01:47:02 [DEBUG] test episode 75: reward = 195.35, steps = 1600\n",
      "01:47:03 [DEBUG] test episode 76: reward = 217.50, steps = 1552\n",
      "01:47:04 [DEBUG] test episode 77: reward = 181.22, steps = 1600\n",
      "01:47:04 [DEBUG] test episode 78: reward = 202.95, steps = 1600\n",
      "01:47:05 [DEBUG] test episode 79: reward = 202.03, steps = 1600\n",
      "01:47:06 [DEBUG] test episode 80: reward = 186.07, steps = 1600\n",
      "01:47:07 [DEBUG] test episode 81: reward = 219.71, steps = 1536\n",
      "01:47:08 [DEBUG] test episode 82: reward = 74.13, steps = 1436\n",
      "01:47:09 [DEBUG] test episode 83: reward = 209.32, steps = 1600\n",
      "01:47:09 [DEBUG] test episode 84: reward = 208.08, steps = 1600\n",
      "01:47:10 [DEBUG] test episode 85: reward = 193.19, steps = 1600\n",
      "01:47:11 [DEBUG] test episode 86: reward = 164.39, steps = 1600\n",
      "01:47:12 [DEBUG] test episode 87: reward = 202.00, steps = 1600\n",
      "01:47:13 [DEBUG] test episode 88: reward = 202.55, steps = 1600\n",
      "01:47:14 [DEBUG] test episode 89: reward = 225.22, steps = 1466\n",
      "01:47:15 [DEBUG] test episode 90: reward = 161.39, steps = 1600\n",
      "01:47:15 [DEBUG] test episode 91: reward = 193.27, steps = 1600\n",
      "01:47:16 [DEBUG] test episode 92: reward = 159.64, steps = 1600\n",
      "01:47:17 [DEBUG] test episode 93: reward = 200.44, steps = 1600\n",
      "01:47:18 [DEBUG] test episode 94: reward = 170.55, steps = 1600\n",
      "01:47:19 [DEBUG] test episode 95: reward = 216.36, steps = 1568\n",
      "01:47:20 [DEBUG] test episode 96: reward = 215.38, steps = 1581\n",
      "01:47:21 [DEBUG] test episode 97: reward = 213.69, steps = 1594\n",
      "01:47:22 [DEBUG] test episode 98: reward = 202.27, steps = 1600\n",
      "01:47:22 [DEBUG] test episode 99: reward = 164.51, steps = 1600\n",
      "01:47:22 [INFO] average episode reward = 191.97 ± 30.95\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD4CAYAAAAEhuazAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAA2TklEQVR4nO2deZwcdZn/30/39JxJJplkck+YBJJAQjjHcClyyrmiqBh3VUAUV/Fm1wVXf/JScfF214OFFYFVVkQFQUEQEEGQIwECSUhCQs5JhmRyTSbJXN39/f3RVT3V3VXdXX1Md08/79crTNe3vlX1dDHzqaee7/N9vmKMQVEURakuAqU2QFEURRl5VPwVRVGqEBV/RVGUKkTFX1EUpQpR8VcURalCakptQLZMmjTJtLe3l9oMRVGUiuLFF1/cZYxpTW6vGPFvb29n2bJlpTZDURSlohCRzW7tGvZRFEWpQlT8FUVRqhAVf0VRlCpExV9RFKUKUfFXFEWpQlT8FUVRqhAVf0VRlCpExV9RFKWMeGnLXlZt7yn6dSpmkpeiKEo1cOlP/w7AppsuKup11PNXFEUZQYYiUXr7hzL2+/3L21i/s7dodqj4K4qijCCf/r+XWXTDnzP2+9yvl3PO958qmh0q/oqiKEUkEjUMhCPx7YdXvenaZ+ueQ67H7z04WBS7VPwVRVGKyJV3LGX+lx9OaXeun377Mxt527efYHXX/pR+P3lifVHsUvFXFEUpIk+93u3aHokOi/+GXQcBeH7D7pR+tTXFkWkVf0VRlBIQdoj/4a1jgOGHgJPG2mBRrq/iryiKUgKcnn99KCbFQxGT0q+htjgZ+Sr+iqIoI0DUEnuR2HbYIfT2c8De56Q26NJYAFT8FUVRRoChaBSAmkBMzMPRKKu297D34GB88PfVzn2px7m8DRQCneGrKIoyAgyGY+IfEAEMQxHDRf/1dEKfldtSs30GI9Gi2KOev6IoSgE5OBBOiOfb/PK5Lcz/8sMMWA+Bbfv6sjrfi5v3Ei7CAyBv8ReRNhF5QkRWi8gqEfms1d4iIo+KyDrr5wTHMdeLyHoRWSsi5+Vrg6IoSjkwFImy8KuP8NUHVqbs+/NriZO7/uU3r6Q918SmWgAefW1HQmZQoSiE5x8GrjXGHAWcDFwjIguA64DHjTFzgcetbax9S4CFwPnAT0WkOLlMiqIoI4gd2vndi9tS9rU01iZsb3RJ63TiHPytDxVeIvMWf2NMlzHmJetzL7AamAFcAtxpdbsTeJf1+RLgbmPMgDFmI7AeWJyvHYqiKOWCU7iD1gBvS1Oi+NsDv97nKE6Wj01BY/4i0g4cDzwPTDHGdEHsAQFMtrrNALY6Duu02tzOd7WILBORZd3d7rPkFEWpHowx/HXtzoTSCOVOyErVjCTZnCmUc1nHzKLZBAUUfxEZA/wO+JwxJnXI2tHVpc31LhhjbjXGdBhjOlpbWwthpqIoFcz9y7dzxe1Luev5LaU2JWtCwZjM3vtSaijIi3OOmsy1584vlklAgcRfRELEhP8uY8y9VvMOEZlm7Z8G7LTaO4E2x+Ezge2FsENRlNHN9p5Yhkzn3uwyZcqBQA7hm/pQkECGsFC+FCLbR4DbgNXGmO87dj0AXG59vhy439G+RETqRGQ2MBd4IV87FEWpHox7sKDkuFnV05d54ZZk7Hj/rJZGLjh6ap5WuVOISV6nAR8CVojIcqvtS8BNwD0ichWwBXgfgDFmlYjcA7xGLFPoGmNMJOWsiqIoSYgdNS5P7Y9zaDDCocEwtcH8/OunvnhmgSxKJW/xN8Y8jXscH+Bsj2NuBG7M99qKolQXRU6AyRvnQPR7bn6Wc46anKZ3adHyDoqiVBxl7vgDsLprP7MnNZbaDE+0vIOiKBWD7fiXY6rn3oODWa3NWy6o+CuKUjEUOuzTPxThnqVbC/Iwec1lCUbxjIiXHg37KIpScRTK8f/BY69zy5MbaG4Mcd7C/LJqylfm3VHPX1GUisH2pAsV9OnePwBAb384/5NVmPqr+CuKUjGUc7ZPLpO5SomKv6IoFUehx3vX7ejlu4+szSv27yr9Zfw80Ji/oigVR6Fn+N7y1AYAPnb6HJobQjmdo5BVOEcim0k9f0VRKgZbYIuljfnod4VFfVT8FUWpHMpJX1/t3Medf98U33azbef+/hGzxy8q/oqiVBx/fLWrIOfJ5wXinT9+hq8+sCptn6Wb9uZxheKi4q8oSsVgh1Z2HRhwXSTdL6927sv7HDZFWGa3qKj4K4pSkeQr/j2HhnijO3Ed3XzCSoV4GI0kKv6KolQk4Wg0r+MHI6nH5yPfUZ+j0IdN9C76NhKPERV/RVEqBqdnnmkN3ExkWkA9W77359j8AD+ef1tLA0/+a/Fq9WeDir+iKBWDM5c+EvEv/gPhCAPh2NpRbssk5vI4+NFf1tN9YCBlgfZ8GImsJp3kpShKRZKL53/i1x9jIBxh3Y0XFjS2cuuTG/jZ0xuz7l8O1T5V/BVFqRicE6lyifkfGBgu4JbPLOHkGbh+hB/KY0JYQcI+IvJzEdkpIisdbTeIyDYRWW79u9Cx73oRWS8ia0XkvELYoCjK6Cch5p9D2MeJW5Qm2zPmm9hTBtpfsJj/HcD5Lu0/MMYcZ/17CEBEFgBLgIXWMT8VkWCB7FAUpUooZWplvplGmaiYbB9jzFPAniy7XwLcbYwZMMZsBNYDiwthh6IooxxHvCRfAXYT2GzHbJPfOvwmDhWyCFyuFDvb51Mi8qoVFppgtc0Atjr6dFptKYjI1SKyTESWdXd3F9lURVHKnUKmeuZTOTP52nU1/oIXpZf+4or/zcDhwHFAF/A9q93te7v+XzDG3GqM6TDGdLS2thbFSEVRKpO8Y/5ZN6aSHHLyPWegDNS/aOJvjNlhjIkYY6LA/zAc2ukE2hxdZwLbi2WHoiijB2e0pBgx/2wzgJJDTsFgGai5T4om/iIyzbH5bsDOBHoAWCIidSIyG5gLvFAsOxRFGT048+Pzjvn7fHYs37qP9useZOmmPby2fX/CPjfP/8rT2j3Pldz7nKOm+DOmABQq1fNXwLPAfBHpFJGrgG+LyAoReRU4E/g8gDFmFXAP8BrwMHCNMSZSCDsURake8g/7pB6f7oHw/IbdANzy5AauuH1pwr6gi/ift3Cq57mSB3w/+rbZycYVnYJM8jLGfMCl+bY0/W8EbizEtRVFqR4KGvbxeXhjXUwuH1u9I2VfTSDVj04XCEreV4qgkdb2URSlYnCK5FBRYv7ePPfGbs99bp6/W+0gL0qR+qniryhKWfGOHzzJPUu3uu5L9PwLn+efjgdXeK8e5qbd6bQ/uX/K8SPwLFDxVxSlrHh9xwG++LtXM/YrSnmHHHP/3WxJ580nF3bTsI+iKEoanKKZb8zfdcA3x3O52ZI25p/J8x8BVPwVRalIihHzzxW3Wv7+4vhJfUfgq6n4K4pSMTi99bxj/q5hn9zOFXZZEtLPpN9SeP5az19RlIrBKc4ueps1v3uxk+88sjZ/gyzc6gylW7Al+a2gFDF/FX9FUSoGp8TmU5jt2t+84nH+3M7pGvNPl+2T0ldTPRVFqWIyCbpzdzHC4j2HhgD44WOvc+tTb2RtVzhqmDSmLqEt4EPQNdtHUZSqJpMzn+CZF0H9z/3BUwD88LF1fPOhNazf2Uv7dQ/y1LpdaY+LRA0nHjae6c318TaXSb9xNNtHURTFB4mef/FTYp7fGFuj6k9pJnhBTPyTZ/mmj/knbSf1HYnvpuKvKErZkEnyEmP+xbQkhh3Kz8YzD4gkxO79ePPq+SuKUtWki62v3NbDS5v3DvcdUXsS1fnTZx3BtefOS2hL9fy9SfdWMFJoto+iKGVDOkG/+EdPj5gdNlHL9U/2zI+YPIbOvX0JbcHk9E0XfQ8GhEjUaMxfURQlV0Yi7LOjdwBI9eJrgy4lnLNQ8LqagOv5Umv9FP9poOKvKErZ4EfQR2JQ9Oa/xtI9k9M2Qy7in9qUKuC2+Kf0VM9fUZRqxo+gj4Tnb5MszqGaQEpbMCAJbW6C3hAKuu5M7qvZPoqiVBX+PP+RI5uwT/LbgZszP64h5LqvFAPAhVrD9+cislNEVjraWkTkURFZZ/2c4Nh3vYisF5G1InJeIWxQFKXKGEHXP7lqZ22NpAi222peyUxorAVG1ySvO4Dzk9quAx43xswFHre2EZEFwBJgoXXMT0UkWCA7FEWpEgol/dmI9sBQYhW5UDA17BPL8x/edhsAntAUcj1/xZZ3MMY8BexJar4EuNP6fCfwLkf73caYAWPMRmA9sLgQdiiKUtn4CvsUSP3rPQZhnQyEU8U/mYAkvg24CXpzQ63nvpGmmHn+U4wxXQDGmC4RmWy1zwCec/TrtNpSEJGrgasBZs2aVURTFUUpB9wGOl/espdlm/am9i2Q+teHghwcjKTtMxBO3F9bE0gR8OTngVsop9mO+WcxJ6DYlGKSl9vXdP2/aIy5FbgVoKOjo3yW7VEUpSi46fm7f/r3ol6zPpQ56jyY5PnXuoV9MoSPfn31yTy7YbfH3qTaPhW+ktcOEZkGYP3cabV3Am2OfjOB7UW0Q1GUUUih9LEulFkGk9dnd83zT8n2Sdw+ac5Ez6yeSh7wdeMB4HLr8+XA/Y72JSJSJyKzgbnAC0W0Q1GUCsGPoBcu5p/Z808OMYWCqdk+NUlvA74Ku2XftWAUJOwjIr8CzgAmiUgn8FXgJuAeEbkK2AK8D8AYs0pE7gFeA8LANcaY9AE3RVGqAj9x/EJ5/vVZeP7RZPF3meQVyiJryOuBUIqVvAoi/saYD3jsOtuj/43AjYW4tqIoowd/nn/hBny9OGZmM+GIIXmteLdJXjUubcl4SXzFpnoqiqJUKl71diAmyiIunr+b+AcSA0Fuzry3559dv0KiJZ0VRSkbSpLnny7bR4SApF4rVscnOeafvJiLnzV8R1e2j6Ioij98VvV8tXMf//PUhrwumU783Tz/x77wdte+WYV9rAdCSm2fKsnzVxRFccVvVc93/vgZAD52+pycr5luwFck5pU7a/vYQp2s18kDvun0vBwmLannryhKVVOXIdVTGF7LN6E9Sd1rgoGMMX/Pa4yyPH9FURRf5FPS+RfPbeajdy7zfc1swj5Ow7x0OhSswlRPRVGUQpDPJK+v/H6le8cM1KYRbZHYUOyQiSa0QepDIHUB99Tz2m2p9fxHHhV/RVHKBn+TvAoUOU/jddt7nHn+dltKtk8gkKDi6VI9ky1PWclrBAYFVPwVRalInAK59+BgzudJNzHXHvCNugz4JpMc9vHjzVfsSl6KoiiFIFeH9/ivP5rzNZOXX3QiCLjk+UPmAV/38yX+9DrXSKDiryhK2eBvkldhYiOZSvLEsn2cA77ucfuU2j5+ZvimN6EoqPgrilI2+M3zLwRpM20k9maQTdgnedZvulBOiunq+SuKUs3c9NCahO3kFbTAe9A0V9KHfWLXc33QpJR3CKTbbZ3Po55/UvtIhIFU/BVFKRvufXlbwvaB/nBKH1sXb3t6Y0GumWnAN9nzT7bDxk+ef6aYv9b2URSlYujtH+InT6wn4jYdNkcODLiIv6WUPX1DBblGpgHfWG0f5/Xd+9YEkjx/HzZozF9RlIrlmw+t4TuPrOWRVW8W7Jy9Lp5/oUkb8rf2OR9o8UleyYu5BJNLOvuo6lmCdB8Vf0VRCsJBy0tPXuw8H9w8/0KTSXgDIgmZRcPpmonHZaoRlO5a6vkrilKxFMN5PTSYKv6FSvG0yTjJSxIHl72+Z1NdMHENX7fzpbnOSFP0Gb4isgnoBSJA2BjTISItwK+BdmATcJkxZm+xbVEUpfzYvPsgBwbCLJzenLJvMJwq9IUeC02uyeNEEAIiruMYyYLdVFeT8DbgbwH30Rv2OdMYc5wxpsPavg543BgzF3jc2lYUpYLJ1SF/+3f+ykX/9bTrvnDy4rnWdQrpKacL+8TKOyQN+HpM8kpeDtK1sJun65+4WbC6RWkoVdjnEuBO6/OdwLtKZIeiKAWmkMIcjriLYCH95IwlGZJj/nmUZS6nsM9IiL8B/iwiL4rI1VbbFGNMF4D1c7LbgSJytYgsE5Fl3d3dI2CqoijlRNgjbbSQ2TFpUz3FTvV0GfDNtpBPFozWks6nGWO2i8hk4FERWZPxCAtjzK3ArQAdHR3lsPKZoigjSDjinjlUSLHMrraPW3v6sI57SWePbJ/RmOppjNlu/dwJ3AcsBnaIyDQA6+fOYtuhKEpxKYZ3NlTACWNeZCrvkDLDNw+drprCbiLSJCJj7c/AO4CVwAPA5Va3y4H7i2mHoiiViafnX9AB3/T7kmv7xD3+LN4YsmnLZEOxKLbnPwV4WkReAV4AHjTGPAzcBJwrIuuAc61tRVEqiN+92Elvf2FKLHjhVSqikKmRfj1/r/o8Kcf6meFbAt+/qDF/Y8wG4FiX9t3A2cW8tqIoxWNFZw/X/uYVLlozjZ/80wlFu86QR7ZPIbUykMYFFivX023A17N/un6eMf/0NhYDneGrKIpv7Jm3O3v7i3qdkRnwTX82zwHfHBQ73RF/ufbtXHlau+9z5oqKv6IovrG1sNjhCq8B35HylO2wT2Kef+okr4ZQal0fXzN8Bea0juGk2S05WuofFX9FUXxjhtW/qHh7/iOV509qSWeXfo21wZR9fmb4jubyDoqiKHH6hyJEs0jj9BzwLaBWpqvtg0ttn/iAr+Ow+iw9f8+VvDTmryhKpeKn2uaRX3mY6+9dkbGf14DvSE3ysmv7eO2z+ee3z8nqWtnm+etKXoqilCV24THXXPYMbqz9kPj1sq0Zr+NW2K3QpC3sFv+Psy2x4aJF0/jQKe0FsmHkXgFU/BVF8Y/lmeYSrvDj1Xp6/mku7Lfef6avkDImEM/zz6G8Q442FAMVf0VRRhQ/0hyJRlMWRr/0+BlpxdJvyCTjgK9Lm/On27505/PTXkxU/BVF8Y1bqme2mhv1oc7hiElYHnFMXQ3fvHRRWlfZb7g87SQva8A3sS07/GTwjMrCboqijD7MSIV9ooZaxyIpHz99DvWhYAbP35/8Z5PqmS2ZVvIqRUqnFyr+iqIUhGxlzY/nnxz2GQ65pIn5Z332zOeKiX+S529P8spiZnB2jaVhJOr5K4qi5EQ0CjWOuMyw8Hof4z/mn3y8s46PpFwrl7ycyzpmZnXMGfNbueiYaVx/wZE+zp4bKv6Kovgmnuopzrbs8OX5G+Pu+WdhW7Ykh32SzfMa8PXsEO83vONb7zkmK1vqQ0F+8o/FK5TnRMM+iqL4Jh7zd0t3zPLYbIhGDTXBYZkKZBly8UPyqRLMk8yF37zOlVDqoQh254uKv6IoI4ofzz9qDDWOuEymJRch/1RPk1S+OTXsE2vIZRJuPGRUBs8AFX9FUXwTT/XMJdvHR9+IgZDD87eFt5glnZ32ibikenqMASRTyHGJYqDiryhKYchS0IyPig3GGGpcs33SHZP9+cFtwHf4czYPmYSHhfOzi5Hl4PHbqPgrilJQMgmcv1RPQ9C5OlYWNXC8Bny98v+TRdp5vN88/0ydVfwBETlfRNaKyHoRua5UdiiK4p90E6kyabvXbrdzRqKGgEvMPxfP36s9ZcA3qZ+fsM+7j5vubViac5SCkoi/iASBnwAXAAuAD4jIglLYoiiKf4Zj/v5VzPb8kw91K91vDImef9LPdLZlS6YF3FMzO737X35qe9prxQeLqzjmvxhYb4zZYIwZBO4GLimRLYqi+CWe6pnDoR7C5xYOihiTUHvHfgtI7/l7hH08+qeN+YskvHk4r+12nYyzfsvA47cplfjPAJzFvDuttgRE5GoRWSYiy7q7u0fMOEVRcieTwHmJs6v4R02CZ27P9k3nffsJK4Fbtk9SqmdS/9RJX/4VvRweAqUSf7evnvJ/xhhzqzGmwxjT0draOgJmKYqSiRWdPVx5x1KgsKmebuu2GGMSllm0M39yivl79E87w9elto+bjZVIqcS/E2hzbM8EtpfIFkVRfPD8xt3xz7k4sPGYv0e7k4hJ9vyzmeXl0ZztgK9zn1ttnzzcdvvYcnhelEr8lwJzRWS2iNQCS4AHSmSLoig+8BK/bGvqOIWvu3cg/tlN/KPRRM/cLvVQSPFMN8MXChv2KYNoT5ySFHYzxoRF5FPAI0AQ+LkxZlUpbFEUxR/51u6xRT5q4C03PuZod+/rdPZtzz/dXAHPPH+P9rSev0ttn0LE68sh5l+yqp7GmIeAh0p1fUVRcsMpxm5ebyan3DsP333A1xnztz8PRbynCfvO80/Tz622Tz6Ug+jb6AxfRVF84RR818yNDK6/1+6Ii+sfNYmTvOzyzn1DEe/zp716KimCbBL3eS3mEt/2c60yCvyo+CuK4ot8vVev8It72Ccx7BK0Uj37h9J5/l7lHfzbJwWW6+H1CEr/EFDxVxTFF4nFy1L3Z6rd4yby4C7aUWNw1HUjlEW2j2eev+ce71RP37V9siSYTdZSkVHxVxTFF4lhD5eYf6baPp6TvFLbkid5ZSOafjOB0i7mgr/FXDJey/rprFRaKlT8FUXxReYZvOn3e3n+EddUz8SYfzai6V3V071/uvkGIoVNz7Tvnf0Q+9olCzl9XmkmsOoavoqi+MIrXm1rZmbH271HJOIW9kks7OZczN3n6XNcYzi1tk/8fHnMNbC/x4dPaefDp7TnfqI8UM9fURRfJKZ6pu7PlO3j5fmHXeo7JBd2yyrs49WebT3/pJh/JvxEhexzB8tAecvABEVRKgmvRcptMjnEXgPC7jN8E2P+oSxU0+vZ09sfdm1PG/ahsAO+YevJl9UbTJEpvQWKolQUGcsZZBzwdW93m7cVNe6TvNJf3v0CV/9imWt7unUF3Gb45oM9l0EHfBVFqTi8Uj2HY/6Zwj4eMX+XeFBqSefcRXPltv1Z9XtybWL5+GABxd/2/DXVU1GUisMpxm6Dv5lTPd3b3R4KxiQXditCqmfSd/jSfSsS9nkN+OZCxBrXyOchVihU/BVF8UWeUR9PcQ67ef4phd2yiPm7tP32xc6E7Zam2ozngdh39RLqbKuYOhmK2J5/6aW39BYoijIqsB8KmWb4eolmxC3bJ+q+mEva87tc/19+80rC9lHTxma0B2Ihrkyevx8f3g5thTTmryhKpeGVChmP+ec6yct1Ja+Y+NredzbhkmzDPrNaGjP2F5GChmjst5tCDiLnioq/oii+SJ4Bm0zmsI+PAV8r7HP8rPHW9Qojmm4rdHnhNeCbyySvaDzVs/TirzN8FUXxRUbRy3GSl2uevzEERfjZh9/CS1v20twQyt8+nxQyM2fIHvAtg1lepbdAUZSKIpIwCcr/Yi5ePZIHfI0xGBPz9psbQ5x55OSs7MtlINYLEW/xz+UlxC5hUQ6ev4q/oii+SPDQnTF/S3RzjflHk3ZEcsyJz8bzTxirSNcP8bx+Lm8YVZHnLyI3iMg2EVlu/bvQse96EVkvImtF5Lxi2aAoSuHxEm+bXFfySvb87U3f4p9lP/us6exN5/kP98nevnAZ5fkXO+b/A2PMd50NIrIAWAIsBKYDj4nIPGOM97psiqKUDRnFPcPx2c7wtfv5Da9kss8mG9EWCuulxz3/Kk31vAS42xgzYIzZCKwHFpfADkVRcsAZnnFfwzf98dmu4Tto5X7WlnBwVKSw2T6HtTQB0D6xKR+zCkKxPf9PiciHgWXAtcaYvcAM4DlHn06rLQURuRq4GmDWrFlFNlVRlGxILHzmf8DXM9UzqT0csSdE+RN/v5qcqX8hvfQPLG7j8NYmFs9uKdg5cyWvR6qIPCYiK13+XQLcDBwOHAd0Ad+zD3M5lev9N8bcaozpMMZ0tLaWZrUbRVFgx/5+TvmPx1n7Zm/mGby5pnom7RiyPH/f4p+l+mcj6SKSV2G3n1/RwX8uOS7hfCfNmViw+Qr5kJfnb4w5J5t+IvI/wB+tzU6gzbF7JrA9HzsURSku9728ja6efu5euoXpzQ3xdjenOGPYx7O8Q1LYJ2znxPsVyszq7xTftDN8yW9w9qwjp+R8bLEpZrbPNMfmu4GV1ucHgCUiUicis4G5wAvFskNRlPzZvPsgECuJYHv+rWPrXGU2c0ln9/Zk8R/KMeafVapn/D+ZOxaytk85UcyY/7dF5Dhij+FNwMcBjDGrROQe4DUgDFyjmT6KUt7s2D8AxGam9g8Npys6hTbb2j5eYaHkVM9wdGRi/umOEApb26ecKJr4G2M+lGbfjcCNxbq2oiiFZd+hQQAGhiJxzz8g4l6DP8O5PLN9knZs3XMI8F8Bs5Axf/D2/AtcRWLE0Rm+iqJ4YozhhgdW8dKWfQAMhKNxzz0YEPewT44x/+QB36vujC276N/zzybmD5eeMBOASWPq0vbLOOBboS8GKv6KomCMYeW2npT2vqEId/x9U3x7IByNx+xrAuLq/maM+buUbgb3qp6Qf7bP8xt2u/b75BmHs+br5zO+0XthFyGL8ssV+gqg4q8oCrc9vZGLf/Q0yzbtSWi/Z+nWhO2BsCPsE0gM+9ifMnv+7niLv/+wz6ZdBznlPx6nq6ePXz6/xbWfiFAfCqY9VxlkZBYNFX9FUbh/eSzb+uDgcO7F6q793PCH1wDiSyne9dyWuOcfkBxr2mc54Gvjt/yxwfDL5zbT1dPPH1/pcr1etpqeVd3/Cn1AqPgrikJ3byybZyg8HJM55HgQfO+yYwE4MBCmc+8hAhITRrcQT66F3Q4Nhl3b8ynvYDB5hWXU81cUZVRje90Prujixc2x0M/yrfvi++tqhsMjXfv6CUjMI3YKuZ2dk2vVzx/9Zb1re6gmVYHf39HGO4+d7nH+RNHONCM5V7ItIFeuqPgrihJfPP2+l7fxnpufBeDrf3wtvr8+NCwVz27YTThqEJG40O87NMiaN3uB3GP+XrgN+H7rvcfwzUsXZXW8a9gnyaX3ervIbh5YZb4eqPgriuI52GpTXxNMmewU24od944fPBVvzzzD15/8e6Vaeknudfe+mjDhLNObCMB915zqvkOyX+u30lDxVxQlRfztMQCbulCA179xQULmTSAw7OXvdPTPtaSzF3aZh2S8UjBXbttPb//w+EE+4RkBxtSNzqXOR+e3UhQla4wx9IcTBfbk/3g8YbuuJkggIDQ3hNh1IDbbV8hthm+2nv9ZR06mo30CR0we47o/nUe+++Cg43oux2ZlQYzDyqD2fjFQz19Rqpj1O3uZff1DKZ5/8rbtZY+tD8XbxH2OV0bXPltHfFpzPZ8844icyh/vPhh7EzEUZmB20YzmvM9Rbqj4K0qV0tXTxznfH47Vz5vi7mHDsLc+tn44WCAMC/nYuhquPK3d+4HgIJvyC5B5Zm+6mbe7rbeTbGP+mZ4P9qV++P7jho/JfNqyRsVfUUY5W3Yf4ul1u+gbjLB00x6MMQyGo/y/+1cBcOkJM3jkc6fz0GfeFj/m2JkxT/frlyzkC+fOY+H0cUCS+MtwbZ++oQgNoWDCA8GL7fv6s7I7UzXNdC8EPX1DAHzr4TUFSfW0L3XYxEZfdpQzGvNXlDImHIkSDEhC6GP7vj5e2LiHqc31nDS7JWHfY6/t4OWte/nM2XOpDQb47p/X8pMn3gBgenM923v6ufSEGby4eS+bdx9iVksj333vsfHKlfOnjGXtjl4+ccbhHDF5bEq8/W1zW3lmfaxWTizP3zAUiRKOmpj4i/vEL5tXtu7jO4+szeq7Z5rZm05znWGr5MFryF6w4/0qVeHToOKvjGoODIT58n0rmDyuni9deFTJ7IhGDXc9v5l3HjuD5saQa5+n1+3i6Bnj4oXGunr6uPA//8alJ8zkKxcvAGD9zgOc98OnEsRtTmsTAREODoTp6ol51ZEo3P7MRgYcA7nbrX33vrSNgMDEplq+/d5jEkoW215yXU3QdaD146fP4aY/rQGGwz79Q7GZwA21QQLiHWb5xbOb+Ir1tpENtRlq+qQL+xwYGM722bDrYPxzMCAZ01rdsK9U6aEeJ6Ne/DfuOsjEMbWMq3f/g1PKj/6hSMaCW06MMfQPRVm+dR8nz2lh76Ehtu/r46EVXdy9dCt7rMyPc46awhvdBzhsYiPHzBwfT+GLea+G/f1DTGyqjXmvxnBgIExdTZDuAwM8ubabixZNo28owkA4wpRx9QyEozQ3uP9e7Ts0SE/fEBPH1LGh+wDv/PEzAHzl/lXc8qETOfeoKXQfGGDy2DoGwlG+88habnt6IwBfuvBIVmzbzx9eidXbue3pjRwzs5ndBwb5mjXx6piZzbzaGavCuaF7WNwWTBvH9p4+/vvJN+L97rxyMT9/ZiP1oSCfePvhbNvXRyAgzBg/vByjTSQu/h6TnkT42xfPZPu+Pr7zyFoMhj5L/OtCwVjJB0shl27aw/tveZazjpxC1Bj+smZnNv8742T0/LN0xgcdD8CgCJEcJNy+lnPw+KJF03hk5Zt84dx5vs9XDoxq8R8MR7ni9hcYGIpywaKpfPz0w5naXF9qszLS0zfEmq79TGiqZXxDCBFh+dZ9HDl1LG0twzHHSNQQNYZwxBAMCEORKHU1AcJRw8ZdBzly6li29/QzZWxdrPa6ybwkHcR+wcXyJDv39jFvypiE0EI0atiw6yCRqGH2pCZqawI8vLKLYCDApl0HqQsFeM8JM4kaQ1dPP5PG1DG+IZRy7XU7ernlqQ0c6A+zZHEbX/79Sjr39gFw5NSxTB/fwEA4wsXHTGfu5DHc8fdN9A9FGddQw8lzJjKuvoafP7OJFzYOV6JcNKOZFUmliee0NrGh+yCX3fJsvO2Ymc10HNbCqu09PL8xsZIlwMwJDXFbbL5034qUfpeeMIPG2iDLNu2lc28frWPriBrDzv0DcVFM5uO/eJG6mkCCV+7kmw/FPOsZ4xvYti9mw2fvXh7fP6auhns/cSpb9/Zx5nf/CsD3LzuWcfUhzj5qMqu27+fiHz0NwC8+chLNjSGufcf8+PHO36FkbG2rS/PwbWtppK2lEZFYeeYBa2WvhlAQrFDQfz/5RvwN4bHVOxKOP3fBFB59bUfKeZPJtHZvLllAgQAQAb/V2Nx6N9XVcNsVb/FtQ7kwqsW/tibAx08/nC/dt4Lbn9nEis4ePn32XMbW1/CV36/k6OnNnHf0FM6cPxmI1Sq3Pc5INCaoEBO7wUhs30A4wh3PbOKXz2+mfWITi2Y0c+GiadSHAtz0pzU01NbwkdPa6ekb4oz5k+k5NMS4hpqEX9T7l2/jyde7OWHWBA4OhFm6aQ+RqKG5IcT67gPs6h3kzf3ug2KnzJlIy5haHny1K6G9sTaYUIgLYOq4et7cHxPf+lCAnb0DLJw+jitObY95un1DbNh1gL+t28XC6eN4ZWsP4WiUvsEI4WhsUNCu+TK2roa3zG5hIBzhla09Ca/Vh7c28YbD+wTig4lOZoxv4LQjJrJ+54H44iA2D696M/75/R1tvL6zN+4p2jFmJ/e+tA2AyWPrGN8YonVMHet2HogL/6yWRv71vPk01gZZNLOZH/9lPQcGwvz+5W1EDbza2cOrnT0Jnrt9vwAODqQWGZszqQkROK5tAtv2HeK5DXviIZTDJjbxruOns27HAZrqajjtiEn8Yfl2eq3zfP6ceRw9YxzPb9xDb3+Y/f1DBEV4wPLuG0JB+oYifPUfFnDq4ZN4fUcvb5s7iT0HBznre08CsHD6OOpDQX71sZOpCQaYPamJ//vYSYQjhtPntcbtPHpGM69/4wK27+vzDDF5Ec3g+TuxC7vZE6oaa2MDvrc8tSGxn1UDqCYgLP33c2iqq2Hel/+U8fyhQOHzUWaMb0j5Xc0G++83m7HjBz51mus4Q7mRl/iLyPuAG4CjgMXGmGWOfdcDVxF7zn7GGPOI1X4icAfQADwEfNYUsULSxcdOi3tsyzbv5fKfD68Vv2r7fn69bCsTGkPsPTREXU2AY9vG89r2/QyGo0wfX8/ug4MMhqMMhKO0jq2L/0+dMb6Bv63bxd/W7eKnf30j4Zr263ptMMCgY3ZiMCCEghJfA9UWsGSOaxtPR/sEtu45xJv7+9mxf4DjZ42nuSHEX9d2x/tduGgqU8c1sL9/iNqaQFxABoaiPLyyK16Gd9eB4V/El7fs4+UtyxOuN2/KGB5aMSy+R88Yx8mzJ1JbE6AmGOD2ZzZyyMoUmTOpifMWTuW4WeN5/c1efvHcZt7oPsicSU1MH9/AFae288m7Xop/73csmMKUcfX84rnNbNvXxz3LOlO+r+2tX3FqOze8c2HCvhWdPfx++Taeer2bk+a0cFlHGwumjeO3L3aybPNerj59DvOmjAViD+nnNu6mfWLMFidfu+RoAL5/2XH09A3xt3XdHD29OZ698dKWvRzfNoG+oQgHB8K0jq1DRFi1vYeZExpdwzvrdx5gMBzlqGljXb3QL543nze6D3DiYS3xtrOPmpLQ5/uXHctQxNBQm+hpz58a+07jG2v57T+fwvKt+7jytNlxh8Tm1MMnpVwXYo5P+yT/k5Ns8XfW8vGiJijsPTTEp3/1EhB74Dr/kO23m7uuir19HN46Jm04b3xjiMMmNvGKVVAuk+fvhyOnjuUzZ8+lt3+If/td6htcMm9pn8DSTXvj27Yl2QwXHDNzfG5GjjD5ev4rgUuBW5yNIrIAWAIsBKYDj4nIPGuh9puBq4HniIn/+UBmNyBHxtWHeOwLp/PEmm5ufGh1wr4X/v1sPnzbC/GCVDUBIRyJcsb8VoIB4eBAhIU1ATbsOsjqrv001QbpOHoq739LG8e1jee4rz2acL4PnjyL+pogT6/fxZo3exOEH2JvE5GoYdKYOh78zFv504ouZk5o5P/dv5I7PrKYqc31dO3rTwmzJMfAe/uHEibbuGGX4B2KROnuHWBacz17Dw3xrT+t4dfLtvLx0+dwzoIptDTVcnjrGPoGIxhiYZrZE5sSQjSfOesIDO5511/9hwWISIIorf3G+ew+OMgDy7dzxantBALCte+YF4+xf/n3K5kxvoEp4+qZO2UMx8+a4Pk9Fs1sZtHM1Ak2SxbPYsniWQltgYB4iqGT5oYQFx+TWBHSFuimuhqaHNP5F073ntzjNfPUZnxjbYLwu1ETDFCTYXijo72Fjvb05ykU9ipbdZmMIhZ++vsbw29lsyc1xePr33nvMbyvo42d+/uZPM471Hrm/FaesByafzppFh3tLVx5+1LAfx3/dBgDFy6axu9etJ0PbxV/+7xWjp3ZzNJNe+NF20Zhsk9+4m+MWQ2usbdLgLuNMQPARhFZDywWkU3AOGPMs9Zx/wu8iyKKP8ARk8dyeGvsD9V+AEwaU8vksfU8/LnTgZjXmC4e/mZPP61W7Nzm2evPYvu+Po5rm8DGXQeY1RKLfwP8Zc0OfvDoOk48bAL7Dg0yvrGWDbsOctVbZzN/ylimjKvnitNmA3DOgmFvcNzUVFFP9pYyCb+TUDAQ94Jbmmr51nuP4VvvPSaln+152vfJSbo/Qrd9IsKkMXV85K2z423OpfJuek/q9ZXywLlKVybGOHL+39/RlvDQtMfW0gk/wO1XLmZ1V2yM4rKONg6b2MTi2S28sHEPoSxsyBZ7IDtk/X16LRwDxBMEYHhCmv0QqPQyzk6KFfOfQcyzt+m02oasz8ntrojI1cTeEpg1a5ZXt6wQET52+hzev7iNY274MyfNmZiwP9Mvu9tA8bTmBqY1x4T1iMljE/addeQUzjpySsoxilLOtDTV0tXTn5Xw2hl0Y+tqUhyKGh/x+qOmjeONb14Y37ZTMf2u3ZsOe3F4O300HPEW8e37+uIPi/hEMzvbp2AWlZ6M4i8ijwFTXXb9uzHmfq/DXNpMmnZXjDG3ArcCdHR0FOS+j6sP8eBn3srsHOKhijLa+dnlHTy+emdGjx2GB4WPPyw1bFfrsgBLtoStcGkhY/5xz996oHhVCgW47C1t8TeDYNJDbBQ5/pnF3xhzTg7n7QTaHNszge1W+0yX9hElXRxXUaqZac0NfPDkw7Lqu8PKjLrg6FTfMB+vPVwEz99+m7DDsl7if+TUsfzb+UfyDWs+hW1CIO75jx71L1ZtnweAJSJSJyKzgbnAC8aYLqBXRE6W2EDBhwGvtwdFUcqYKVYodMG0cSn78hFuW6gz1fbxw7Ft44Fhu7xi/vaYXrLnL6Mw7pNvque7gR8BrcCDIrLcGHOeMWaViNwDvAaEgWusTB+ATzCc6vknijzYqyhKcfj8OfNimTGWsDoJ5RGysb3yQnn+919zWjwdeDjsk17FvR5Ao0j78872uQ+4z2PfjcCNLu3LgKPzua6iKKWnPhT0TK0tRNjHb8z/o2+dzc+sEhk2s1oaEx5OdRnCPjb2GIH9JjBc3sGXSWWNlnRWFKXg5CP+dnG6SWPqfB03xyVN+Y+feaurXeFM4h9J9PxFY/6KoiiZyUf87YlimSbROfnX8+a7jhEkF3S0w1HJYZ9kj3445m+JfzzPP2uTyp5RXdtHUZTSkCnmf/uVb2H9jgOu++766Ems7trv6wFyzZlHsHXPoYS2J/7lDBe70od9bA8/Yk11Tgn7ZG1R+aOev6IoBSeTcJ85fzIfO32O677TjpjER9/mvi8dbS2NbLrpovi221yeugwzfG3PPtnzH94/euRfPX9FUQpOIXP0/fL8l8723JfNJC9wZvtYqZ6jsLiPir+iKAUnn1RPPxwxeQz/kFSkb0qa2ckhj2wfu2rr0dYE0EhKzD/G6PH7VfwVRSkCI+UpP/aFt/vqXxvP9kmU8VkTG7n3k6fGJ6wl5/m7reRV6WjMX1GUqsF+I/nQKaklLE6YNSFeQTce87f6H98Wq180dVzq0peVinr+iqJUDSLC+hsvSBnITSYe9rFc/k+fdQTnHz01vsjOaEA9f0VRCsY/nZRf6fWRoCYYyBiWClupnnbYJxCQUSX8oOKvKEoBufHdixLSLSuV5AHf0YiKv6IoShIq/oqiKFWIir+iKEoVEk6a5DUaGb3fTFEUJUfsMhChPJajLHc01VNRFCWJH//jCdyzbCvzp4yuDB8nKv6KoihJTB/fwOfOmVdqM4qKhn0URVGqkLzEX0TeJyKrRCQqIh2O9nYR6ROR5da//3bsO1FEVojIehH5LxmN5fIURVHKnHw9/5XApcBTLvveMMYcZ/37Z0f7zcDVwFzr3/l52qAoiqL4JC/xN8asNsaszba/iEwDxhljnjWx8nj/C7wrHxsURVEU/xQz5j9bRF4WkSdF5G1W2wyg09Gn02pzRUSuFpFlIrKsu7u7iKYqiqJUFxmzfUTkMWCqy65/N8bc73FYFzDLGLNbRE4Efi8iCxleE8GJZ4FsY8ytwK0AHR0do6eQtqIoSonJKP7GmHP8ntQYMwAMWJ9fFJE3gHnEPP2Zjq4zge1+z68oiqLkR1HCPiLSKiJB6/McYgO7G4wxXUCviJxsZfl8GPB6e1AURVGKhOSzLJmIvBv4EdAK7AOWG2POE5H3AF8DwkAE+Kox5g/WMR3AHUAD8Cfg0yYLI0SkG9ico6mTgF05HltN6H3KjN6j7ND7lB0jcZ8OM8a0JjfmJf6VgogsM8Z0ZO5Z3eh9yozeo+zQ+5QdpbxPOsNXURSlClHxVxRFqUKqRfxvLbUBFYLep8zoPcoOvU/ZUbL7VBUxf0VRFCWRavH8FUVRFAcq/oqiKFXIqBZ/ETlfRNZa5aOvK7U9pURE2kTkCRFZbZXh/qzV3iIij4rIOuvnBMcx11v3bq2InFc660cWEQladan+aG3rPUpCRMaLyG9FZI31O3WK3qdUROTz1t/bShH5lYjUl819MsaMyn9AEHgDmAPUAq8AC0ptVwnvxzTgBOvzWOB1YAHwbeA6q/064FvW5wXWPasDZlv3Mljq7zFC9+oLwP8Bf7S29R6l3qM7gY9an2uB8XqfUu7RDGAj0GBt3wNcUS73aTR7/ouB9caYDcaYQeBu4JIS21QyjDFdxpiXrM+9wGpiv5yXEPtDxvr5LuvzJcDdxpgBY8xGYD2xezqqEZGZwEXAzxzNeo8ciMg44HTgNgBjzKAxZh96n9yoARpEpAZoJFbLrCzu02gW/xnAVsd22vLR1YSItAPHA88DU0ys5hLWz8lWt2q9fz8EvghEHW16jxKZA3QDt1vhsZ+JSBN6nxIwxmwDvgtsIVbpuMcY82fK5D6NZvH3VT66WhCRMcDvgM8ZY/an6+rSNqrvn4hcDOw0xryY7SEubaP6HlnUACcANxtjjgcOEgtfeFGV98mK5V9CLIQzHWgSkQ+mO8SlrWj3aTSLfyfQ5tiu+vLRIhIiJvx3GWPutZp3WCus2Sut7bTaq/H+nQa8U0Q2EQsTniUiv0TvUTKdQKcx5nlr+7fEHgZ6nxI5B9hojOk2xgwB9wKnUib3aTSL/1JgrojMFpFaYAnwQIltKhlWCe3bgNXGmO87dj0AXG59vpzhEtsPAEtEpE5EZhMry/3CSNlbCowx1xtjZhpj2on9vvzFGPNB9B4lYIx5E9gqIvOtprOB19D7lMwW4GQRabT+/s4mNtZWFvcp42IulYoxJiwinwIeIZb583NjzKoSm1VKTgM+BKwQkeVW25eAm4B7ROQqYr+s7wMwxqwSkXuI/VGHgWuMMZERt7o80HuUyqeBuyzHagNwJTFnUu+ThTHmeRH5LfASse/9MrFyDmMog/uk5R0URVGqkNEc9lEURVE8UPFXFEWpQlT8FUVRqhAVf0VRlCpExV9RFKUKUfFXFEWpQlT8FUVRqpD/DzGTS3SPFm5tAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "logging.info('==== train & evaluate ====')\n",
    "episode_rewards = []\n",
    "for generation in itertools.count():\n",
    "    agent.train(reward_clipped_env)\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('evaluate generation %d: reward = %.2f, steps = %d',\n",
    "            generation, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-10:]) > 200.:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
